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A ppendix "O" 
FILTER 



#include <stdio.h> 

^include < signal. h> 
5 #include <ctype.h> 

^include <unistd.h> 

#include < string. h> 

^include <sys/stat.h> 

^include <math.h> 
10 ^include "parseopt.h" 

include "utl^str.h" 

^include "utl_mem.h" 

^include "utljlle.h" 

#include "utl_math.h" 
15 include "ct.h" 

#include "ct_expr.h" 

#include "ct_proto.h" 

^include "import_proto.h" 

^include "io^fprint.h" 

20 #include "dservTypes.h" 

/* 

** 
** 

25 ** Abstract : Function parses range field string for ADS design programs. 
** It takes a string of the form 

** "logp -1.0 8.0 MW 200 500 price 0 12.50" and fills in the 

** global array RangeFields. 

** 

30 ** 
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** Usage : 
** 

** Returns : 1 on success, 0 for failure. 
** 

5 ** Algorithms : None. 
** 

** 

** 

**-E: 
10 */ 

int ParseRangeVar(rangeVar,numRangeFieldsAllocated,numRangeFields,rangeFields) 
char *rangeVar ; 
int *numRangeFieldsAllocated ; 
int *numRangeFields ; 
15 struct RangeStruct **rangeFields; 

{ 

static int stat = 0 ; 
char "buffer = (char *)NULL ; 
char *name ; 
20 char *low ; 
char *high ; 
int i ; 

*numRangeFieldsAllocated = 0 ; 
*numRangeFields = 0 ; 
25 *rangeFields = (struct RangeStruct *)NULL ; 

if ( ! (buffer = UTL_STR_SAVE(rangeVar)) ) 
goto Failure ; 

name = strtok(buffer," "); 
while ( name ) 
30 { 
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if ( !(low = strtok(NULL, M ")) ) 

goto UnableToParse ; 
if ( !(high = strtok(NULL," ")) ) 

goto UnableToParse ; 
5 if ( *numRangeFields > = *numRangeFieldsAUocated ) 

{ 

if ( ! *rangeFields ) 

{ 

if (!(*rangeFields = (struct RangeStruct 

10 *)UTL_MEM_CALLOC( 
ALLOCATEINCREMENT, 

sizeof(struct RangeStruct)))) 
goto Failure ; 

15 else 

*numRangeFieldsAllocated = 

ALLOC ATE_INCREMENT ; 
} 

else 

20 { 

if (!( *rangeFields = (struct RangeStruct 

*)UTL_MEM_RECALLOC( 

rangeFields, 

(*numRangeFields Allocated *sizeof (struct RangeStruct)), 
25 ((*numRangeFieldsAllocated + ALLOCATEJNCREMENT) * 

sizeof(struct RangeStruct)) )) ) 
goto Failure ; 

else 

*numRangeFieldsAllocated + = 

30 ALLOCATE INCREMENT ; 



} 



} 
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(*rangeFields)[*numRangeFields].RangeFieldName = 
UTL_STR_S A VE(name) ; 

(*rangeFields)[*numRangeFields].lowValue = atof(low); 
(*rangeFields)[*numRajigeFields].highValue = atof(high); 
5 (*numRangeFields) + + ; 

name = strtok(NULL, H "); 

} 

stat = 1 ; 
goto Cleanup ; 

10 UnableToParse: 

fprintf(stderr, " Unable to parse -rangevar %s\n",rangeVar); 
stat = 0 ; 
goto Cleanup ; 
Failure : 
15 stat = 0 ; 

goto Cleanup ; 
CleanUp : 

if ( buffer ) 

UTL_MEM_FREE(buffer) ; 
20 return stat ; 
} 

/* 

** + E: 
** 

25 ** 

** Abstract : Function parses one of field string for ADS design programs. 
** It takes a string of the form 

** "supplier Aldrich,Sigma,FIuka,SALOR taste SWEET,Salty" 

** global array OneOfValues. 

30 ** 
** 
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** Usage 
** 

** Returns 

5 ** Algorithms 
** 

** 

** 

**-E: 
10 */ 

int ParseOneOfVa^ 

char *oneOfVar ; 

int *numOneOfFieldsAllocated ; 

int *numOneOfFields ; 
15 struct OneOfStruct **oneOfValues; 

{ 

static int stat = 0 ; 

char *buffer = (char *)NULL ; 

char *name ; 
20 char *choices ; 

char *choice ; 

int i ; 

int j ; 

char *cp ; 
25 char *end ; 

*numOneOfFieldsAllocated = 0 ; 

*numOneOfFields = 0 ; 

(*oneOfValues) = (struct OneOfStruct *)NULL ; 

if ( ! (buffer = UTL_STR_SAVE(oneOfVar)) ) 
30 goto Failure ; 

/* 

** Start off by reading the field name . 



: 1 on success, 0 for failure. 
: None. 
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*/ 

name = strtok(buffer," "); 

while ( name ) 

{ 

5 if ( *numOneOfFields > = *numOneOfFieldsAllocated ) 

{ 

if ( !(*oneOfValues) ) 
{ 

if (!(*oneOfValues = (struct OneOfStmct 

10 *)UTL_MEM_CALLOC( 

ALLOCATEJNCREMENT, 

sizeof(struct OneOfStmct)))) 
goto Failure ; 

15 else 

*numOneOfFieldsAllocated = 

ALLOCATEJNCREMENT ; 
} 

else 

20 { 

if (!( *oneOfValues = (struct OneOfStmct 

*)UTL_MEM_RECALLOC( 

*oneOfValues, 

(*numOneOfFieldsAllocated *sizeof(struct OneOfStmct)), 
25 ((*numOneOfFieldsAIIocated + ALLOCATEJNCREMENT) 

sizeof(struct OneOfStmct)) )) ) 
goto Failure ; 

else 

*numOneOfFieldsAllocated + = 

30 ALLOCATE INCREMENT ; 



} 
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(*oneOfValues)[*numOneOfFields].OneOfFieldName = 

UTL_STR_SAVE(name) ; 

(*0neOfValues)[*numOneOfFields].numValues = 0 ; 
(*oneOfValues)[*numOneOfFields].num Values Alloc = 

5 ALLOCATEJNCREMENT ; 

if ( !((*oneOfValues)[*numOneOfFields]. values = (char **) 



UTL_MEM_CALLOC(ALLOCATE_INCREMENT, 



sizeof(char *)) ) ) 
10 goto Failure ; 

/* 

** Now look at the choices this field could have. 
*/ 

choices = strtok(NULL," "); 
15 if ( ! choices ) 

goto UnableToParse ; 
choice = strtok(choices,","); 
while ( choice ) 

{ 

20 if ( (*oneOfValues)[ s|e numOneOfFields].num Values > = 

( stc oneOfValues)[*numOneOfFields].numValuesAlloc ) 

{ 

if ( !((*oneOfValues)[*numOneOfFields]. values = (char **) 

UTL_MEM_RECALLOC((*oneOfValues)[*numOneOfFields].vaIues, 

25 ( 
(*oneOfValues)[*numOneOfFields] . numValuesAlloc * 

sizeof(char *)), 
( ((*oneOfValues)[*numOneOfFields]. numValuesAlloc + 

ALLOCATE JNCREMENT ) 

30 *sizeof(char *)) ) )) 

goto Failure ; 




507 

(*oneOfValues)[*numOneOfFields] .numValuesAlloc + = 

ALLOCATEJNCREMENT; 

} 

(*oneOfValues)[*numOneOfFields].values[(*oneOfValues)[*numOneOfFields].num Values] 
5 = UTL_STR_SAVE(choice); 

(*oneOfValues)[*numOneOfFields].numValues+ + ; 
end = choice + strlen(choice) + 1 ; 
choice = strtok(NULL,","); 

} 

10 (*numOneOfFields) + + ; 

name = strtok(end," "); 

} 

stat = 1 ; 
goto CleanUp ; 

15 UnableToParse: 

fprintf(stderr, "Unable to parse -oneof %s\n",oneOA^ar); 
stat = 0 ; 
goto CleanUp ; 
Failure : 
20 stat = 0 ; 

goto Cleanup ; 
CleanUp : 

if ( buffer ) 

UTL_MEM_FREE(buffer) ; 
25 return stat ; 
} 

/* 

**+E: 
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** 
** 

** Abstract : Function parses a line from the input file and extracts 

** out any rangevar or oneof fields. 

5 ** 
** 

** Usage : 
** 

** Returns : Always returns 1 ; 
10 ** 

** Algorithms : None. 

** 

*# 
** 

15 **-E: 
*/ 

int ReadLine Attributes(line, numRangeFields , rangeValues , rangeFields , numOneOfFields , 

oneOfValues,oneOfFields) 

char *line ; 
20 int numRangeFields ; 

float **range Values; 

struct RangeStruct *rangeFields; 

int numOneOfFields ; 

int * *oneOfValues ; 
25 struct OneOfStruct *oneOfFields; 

{ 

int i ; 
int j ; 
char *cp ; 

30 /* 

** Now read in the salar selection fields if any. 
*/ 

if ( numRangeFields ) 
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if ( !(*rangeValues = (float *)UTL_MEM_CALLOC(numRangeFields, 



sizeof(float)) ) ) 
5 return 0 ; 

} 

if ( numOneOfFields ) 
{ 

if ( !(*oneOfValues = (int *)UTL_MEM_CALLOC(numOneOfFields, 

10 

sizeof(int)) ) ) 

return 0 ; 

} 

for ( i = 0 ; i < numRangeFields ; i++ ) 
15 { 

if ( ( cp = strstr(line,rangeFields[i].RangeFieldName ) ) ) 
{ 

/* 

** Move past the logp= to get the value of this field, if the value is 
20 ** a ';' then it is a missing value. 
*/ 

cp = cp + strlen(rangeFields[i].RangeFieldName) + 1 ; 

if ( *cp == ';' ) 

(*rangeValues)[i] = MISSING_FLOAT_VALUE ; 

25 else 

(*rangeValues)[i] = atof(cp); 

} 

else 

{ 

30 (*rangeValues)[i] = MISSING_FLOAT_VALUE ; 

} 

} 

/* 
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** Parse the -oneof field, we are looking for something looking like 

** " supplier =Aldrich" 

*/ 

for ( i = 0 ; i < numOneOfFields ; i++ ) 
5 { 

if ( ( cp = strstr(line,oneOfFields[i].OneOfFieldName ) ) ) 

* 

{ 

cp = cp + str len(oneOfPields[i].OneOfFieldName) + 1 ; 
if ( *cp == ';' ) 

1Q (*oneOfValues)[i] = MISSING_INT_VALUE ; 

else 
{ 

for ( j = 0 ; j < oneOfFields[i].num Values ; j + + ) 
{ 

15 if ( UTL_STR_NCMP_NOCASE(cp, 

oneOfFields[i] . valuesfj] , 
strlen(oneOfFields[i].valueslj])) ==0) 

{ 

(*oneOfValues)[i] = j ; 

20 break; 

} 

} 

if ( j = = 0 neOfFields[i].num Values ) 

(*oneOfValues)[i] = NOT_A_MATCH_VALUE 

25 } 
} 

else 

(*oneOfValues)[i] = MISSING_INT_VALUE ; 



30 } 



/* 
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**+E: 
#* 

** 

** Abstract : Function Checks to see if the given product passes the 
5 ** user supplied filters. 

** 
** 

** Usage : 
** 

10 ** Returns : 1 if the product is not within range, 0 otherwise. 
** 

** Algorithms : None. 

** 

** 

15 **-E: 
*/ 

static int 

NotWithinScalarRange(firsandex ? secondIndex,numRangeFields,rangeV^ 
ues - Y02 ? rangeFields,numOneOfFields,oneOfValues_Y01,oneOfValues,Y02 ) on 

20 int firstlndex ; /* Index into Y_01 data */ 

int secondlndex ; /* Index into Y_02 data */ 

int numRangeFields ; 

float **rangeValues_Y01 ; 

float **rangeValues_Y02 ; 
25 struct RangeStruct *rangeFields; 

int numOneOfFields ; 

int **oneOfValues_Y01 ; 

int **oneOfValues_Y02 ; 

struct OneOfStruct *oneOfValues ; 
30 { 

int i ; 

float total ; 
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/* 

** First check the range values. 
*/ 

for ( i = 0 ; i < numRangeFields ; i++ ) 

5 { 
/* 

** If one of the regions has a missing value, then we do not filter this 

** product. 

*/ 

10 if ((( rangeValues_Y01[firstIndex][i] - MISSING_FLOAT_VALUE ) 

= = SMALL_FLOAT ) j | 

(( rangeValues_Y02[secondIndex][i] - MISSING_FLOAT_VALUE ) 
= = SMALL_FLOAT ) ) 
return 0 ; 

15 total =rangeValues_Y01[firstIndex][i] + rangeValues_Y02[secondIndex][i]; 

if ((total > rangeFields[i]. high Value ) || 
(total < rangeFields[i].lowValue ) ) 

{ 

return 1 ; 

20 } 
} 

for ( i = 0 ; i < numOneOfFields ; i++ ) 
{ 

/* 

25 ** If the value is missing then we dont mess with this guy. 
*/ 

if ( ( oneOfValues_Y01[firstIndex][i] == MISSING_INT_VALUE ) | | 
( oneOfValues_Y02[secondIndex][i] = = MISSINGJNTVALUE ) ) 
return 0 ; 

30 /* 

** If any of the regions in the product does not match the selection 
** criteria, then the product is rejected. 
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*/ 

if ( ( O neOfValues_Y01[firstIndex][i] == NOT_A_MATCH_VALUE ) j | 
(oneOfValues_Y02[secondIndex][i] = = NOT_A_M ATCH_V ALUE ) ) 
return 1 ; 

5 } 

return 0 ; 

} 

/* 

**+E: 

10 ** 
** 

** Abstract 
** 

#* 

** Usage 
*# 

** Returns 
** 

20 ** Algorithms 
** 
** 

*/ 

25 int FilterProducts(inputInfo,rangeData,oneOfData,numFiltered) 
struct InputlnfoStruct *inputInfo ; 
struct RangelnfoStruct *rangeData ; 
struct OneOflnfoStruct *oneOfData ; 
int *numFiltered ; 

30 { 

int numProducts ; 



: Function zapps products who are not within the user supplied 
selection criteria. 



1 on success or 0 on failure. 



None. 
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int i ; 

int index 1 ; 
int index2 ; 
int Y01_Offset = 0 ; 
5 int Y02_pffset - 0 ; 
int k ; 

*numFiltered = 0 ; 



for ( k = 0 ; k < inputlnfo- > totallnputs ; k++ ) 



10 



numProducts 



= inputlnfo- >Y_01_Length[k] * inputlnfo- > Y_02_Length[k] 



15 



20 



25 



/* 
/* 



for ( i = 0 ; i < numProducts ; i++ ) 

{ indexl = i / inputlnfo- > Y_02_Length[k] ; /*Y_01 index */ 
index2 = i % inputlnfo- >Y_02_Length[k] ; /*Y_02 index */ 

if ( NotWithinScalarRange(indexl, 
index2, 

rangeData- > numRangeFields , 
rangeData->rangeValues_Y01 + YOl.Offset , 
rangeData- > rangeValues_Y02 + Y02_Offset , 
rangeData- > rangeFields, 
oneOfData->numOneOfFields , 
O neOfData->oneOfValues_Y01 + Y01_Offset , 
O neOfData->oneOfValues_Y02 + Y02_Offset , 
oneOfData- > oneOfFields )) 

{ 

ZaplnputProduct(k, i,0,0); 
FlagProduct(dead_Products,0,0,i + bitMapStartPoint[k] ); */ 

someLeft--; 
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remaininglnput[k]~ ; */ 

*numFiltered += 1 ; 

} 

} 

Y01_Offset += inputInfo->Y_01_Length[k] 
Y02_Offset += inputInfo->Y_02_Length[k] 

} 

return 1 ; 

} 
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A ppendix "R" 



/*+M: dbcsln_bitset.c 



** Author Date Description 



** Fred Soltanshahi 07-30-96 Routines to access a ChemSpace 

** bitset. 
** Entry Points : 

** CS_PRDCT_BITSET_OPEN() -- Opens a CS bitset file. 

** CS PRDCT_BITSET_CLOSE()« Closes and cleansup after a bitset file. 

** CS_PRDCT_BITSET_WRITE()-- Writes a bitset file. 

** CS_PRDCT_BITSET_CREATE()~ Creates a bitset in memory. 

** CS_PRDCT_BITSET_DUMP() - Dumps the content of the file. 

** CS_PRDCT_BITSET_GETHITS— Returns the indexes for the requested 

** number of hits. 

** CS PRDCT BITS ET_S ETBITS 0 - Copies a raw bitset into ChemSpace 

** compressed bitset. 

** CS_PRDCT_BITSET_INDEXES_TO_INDEX -- get bitset index from 

Y_01,Y_02 etc. 

** CS_PRDCT_BITSET_TO_RAWO - Copies a ChemSpace bitset back into 

** a raw bitset 

** cS_PRDCT_BITSET_CONCAT_RAW() -- Copies a ChemSpace bitset 

** into part of a raw bitset 

** CS_PRDCT_BITS ETS ELECTED () -- returns totalSelected 

** CS PRDCT_BITSET_REVEAL() -- Returns elements of hidden bitset 

** data structure to external program. 

** CS_PRDCT_BITSET_SET_PRD_BIT0 - Sets a bit in ChemSpace bitset. 

** CS_PRDCT_BITSET_GET_STATS() -- Returns totals for the bitset. 

** CS PRDCT_BITSET_CORE_INFO() -- Gets core information from a bitset 
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file. 

CS_PRDCT_MSTR_CORE_INFO0 - Gets core information from a master hie. 
CS_PRDCT_BITSET_GETHITSO -- Gets the indexes for the requested set of 
** hits. 
5 ** C S_PRDCT_BITSET_CREATE_BIT_STRINGO - Creates a compressed 

bitstring in memory 

CS_PRDCT_BITSET_DESTROY_BIT_STRING0-- Deletes a compressed bitset 
in memory. 

10 *****************************************— ^ 



-M*/ 



#include <stdio.h> 

#include <unistd.h> 
15 #include <string.h> 

^include <math.h> 

#include "utl_str.h" 

^include "utl_mem.h" 

extern char *GetFullPathNameO; 
20 extern char *basename(); 



#define VERSION.NUMBER 1.001 

#define ALLOCATION_F ACTOR 1.25 /*Extra room in each variation site for growth*/ 
#define MAX_VARIATION_SITES 255 



extern int setbits[8] ; 
25 extern int nbits[256] ; 



typedef struct MasterFileStruct 
{ 

char *masterFilePathName ; 
int masterRecNo ; 
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char *prefixForFiles; 

int numVariationSites ; 

int numberOfMissingBits; 

int Ibits; 
5 char *corefilePathName; 

int startCore; 

char *fingerFileName; 

int fingerOffset ; 

char **x_FileName ; 
10 char **reagentInfo ; 

} MasterFileStruct ; 
typedef struct ProgramlnfoStruct 

{ 

char *programName ; 
15 int bufferSize ; 

int *buffer ; 
} ProgramlnfoStruct ; 
typedef struct BitSetFileStruct 

{ 

20 struct MasterFileStruct masterFilelnfo ; 

struct ProgramlnfoStruct programlnfo ; 
int numVariationSites ; 
int *actuallSizes ; 
int *allocSizes ; 
25 int *numFragsInEachSite ; 

int totalSelected ; 
int *bitset ; 
int firstHitAddress ; 
unsigned char **fragmentBitset; 
30 } BitSetFileStruct ; 
typedef struct 

{ 

struct BitSetFileStruct *bitset; 
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int numVariations; 
int *choices; 

void •caUudata; /* callback callback's udata •/ 

int (*funcptr)(void *udata, int numVariants, int Choices ); 
5 int totalExcluded; 

} BIT_TRACKING; 

static Int RangeCallback ( void *udata, int startRange, int endRange ); 

static int BitSetAddressToIndexes(struct BitSetFileStruct *bitset,int address,int **indxs,int 
*notUsedFlag) 

10 { 

int numVariations; 

int *allPtr; /* alloced Sizes Ptr */ 

int *actPtr; /* actual sizes Ptr */ 

int *choices; 
15 int *chPtr ; 
int x ; 
int i ; 

int skip = 1 ; 
int skipcnt = 0; 
20 numVariations = bitset- > numVariationSites; 

x = address ; 

/* 

** If the caller did not give us space to put in the indexes, allocate our 
** own. 
25 */ 

if ( ! (*indxs) ) 

(*indxs) = (int *) UTL_MEM_CALLOC(numVariations, sizeof(int) ); 

choices = *indxs ; 
if ( notUsedFlag) 
30 *notUsedFlag = 0 ; 

for ( allPtr = bitset- > allocSizes + (numVariations - 1), 

actPtr = bitset- > actuallSizes + (numVariations 



1), 



520 

chPtr = choices + (numVariations 
i = numVariations; 



1- ; 

allPtr-, actPtr--, chPtr- ) 

{ 

*chPtr = x % *allPtr; 
if ( *chPtr > = *actPtr ) 

{ 

if ( Iskipcnt ) 
{ 

skipcnt = 1; 

skip *= ( *allPtr- "chPtr ); 

} 

1* If we are rejecting things out of bounds of the actual sizes then we 
** are out of here. 

*/ 

if ( notUsedFlag ) 

{. 

*notUsedFlag = 1 ; 
break; 

} 

} 

x = x / *allPtr; 
if ( Iskipcnt ) 

skip *= *allPtr; 

} 

if ( Iskipcnt ) 

skip = 0; 
return skip; 
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static int TestDump(DumpCode, Parameter, Dumplnt) 
int DumpCode ; 
void *Parameter ; 
int Dumplnt ; 
5 { 

FILE *File = (FILE *)Parameter ; 

fprintf(File, "%d\n", Dumplnt); 

} 



static int CalculateAllocatedSizes(int numSites,int *sizes,int *allocSizes) 

10 { 

int i ; 

for ( i = 0 ; i < numSites ; i+ + ) 

allocSizes[i] = sizes[i] * ALLOC ATION_F ACTOR ; 

} 

15 static int InitO 
{ 

static int firstTime = 1 ; 
int i ; 

if ( firstTime ) 

20 { 

for (i=0;i<8;i++) setbits[i] = ( 1 < < i) & 255; 
firstTime = 0 ; 

for (i=0;i<256;i++) nbits[i] = (i&D + 0&2)/2 + (i&4)/4 + (i&8)/8 + 

(i&16)/16 + (i&32)/32 

25 + (i&64)/64 + (i&128)/128 ; 
} 

} 



int setbits_nbits_Init() 
{ 
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InitO; 
return 1; 

} 



void *CreateCompressedBitSet(bitset,offset,numVariations,siz^ 

5 int *bitset ; 

int offset ; 

int numVariations ; 

int *sizes ; 

int *allocSizes ; 
10 { 



void 


*compressed ; 


int 


byte ; 


int 


bit ; 


int 


size ; 


15 int 


total ; 


int 


i ; 


int 


index 1 ; 


int 


index2 ; 


int 


newPos ; 


20 int 


rowLength ; 


unsigned char *bs = (unsigned char *)bitset 




Init(); 



#if 0 
/* 

25 ** Always calculate the allocated sizes, we are the only one who can set this. 
*/ 

if ( allocSizes[0] < = 0 ) 

CalculateAllocatedSizes(numVariations,sizes,allocSizes); 

tfendif 
30 /* 
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10 



** Make the allocated size the same as the real size. 
*/ 

for ( i = 0 ; i < num Variations ; i++ ) 

allocSizes[i] = sizes[i] ; 
size = allocSizes[0] ; 
for ( i = 1 ; i < num Variations ; i++ ) 

size *= allocSizes[i] ; 
total = sizes[0] ; 

for ( i = 1 ; i < num Variations ; i++ ) 

total *= sizes[i] ; 
if ( sizes[0] ! = allocSizes[0] ) 

{ 

if ( !( compressed = (void *)IHBDeclare( size ) ) ) 
goto AddTraceback ; 

15 /* 

** Set the bitset if the caller supplied us with one. 
*/ 

if ( bitset ) 
{ 

rowLength = sizes[l] ; 
for ( i = 0 ; i < total ; i++ ) 

{ 

index 1 = i / rowLength ; 
index2 = i % rowLength ; 
byte = ( i + offset ) / 8 ; 
bit = ( i + offset ) % 8 ; 
if ( bs[byte] & setbitspit] ) 

{ 

newPos = ( index 1 * allocSizes[l] ) + index2 ; 
IHBSet(compressed, newPos ); 

i 

} 

IHBOptimize(compressed); 



20 



25 



30 
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10 



} 

else 

{ 



if ( bitset ) 

if ( !( compressed = (void ^IHBDeclareWithlni^offset, size , bs ))) 
goto AddTraceback ; 

} 

else 

{ 

if ( !( compressed = (void *)IHBDeclare( size ) ) ) 
goto AddTraceback ; 

} 



15 } 



return compressed ; 
AddTraceback : 

return (void *)NULL ; 

} 

20 /* 

** This routine will create a compressed bitset that is bigger in every 
** dimension. 

*/ 



void *Create< 
25 int *bitset ; 
int offset ; 
int numVariations ; 
int *sizes ; 
int *allocSizes ; 

30 { 

void *compressed ; 

int byte ; 



CompressedBitSetExp^itset.offset^umVariations.sizes^locSizes) 
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int bit ; 
int size ; 
int total ; 
int i ; 
5 int index 1 ; 
int index2 ; 
int newPos ; 
int rowLength ; 

unsigned char *bs = (unsigned char *)bitset ; 
10 InitO; 
/* 

** Always calculate the allocated sizes, we are the only one who can set this. 
*/ 

if ( allocSizes[0] < = 0 ) 
15 CalculateAllocatedSizes(numVariations,sizes,allocSizes); 

size = allocSizes[0] ; 

for ( i = 1 ; i < numVariations ; i++ ) 

size *= allocSizes[i] ; 
total = sizes[0] ; 
20 for ( i = 1 ; i < numVariations ; i++ ) 

total *= sizes[i] ; 
if ( sizesfO] ! = allocSizes[0] ) 

{ 

if ( !( compressed = (void *)IHBDeclare( size ) ) ) 
25 goto AddTraceback ; 

/* 

** Set the bitset if the caller supplied us with one. 
*/ 

if ( bitset ) 
30 { 

rowLength = sizes[l] ; 

for ( i = 0 ; i < total ; i++ ) 

{ 



10 
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index 1 = i / rowLength ; 
index2 = i % rowLength ; 
byte = ( i + offset ) / 8 ; 
bit = ( i + offset ) % 8 ; 
if ( bs[byte] & setbits[bit] ) 

{ 

newPos = ( index 1 * allocSizes[l] ) + index2 
lHBSet(compressed, newPos ); 

} 



else 

{ 

15 if ( bitset ) 

{ 

if ( !( compressed = (void *)IHBDeclareWithInit(offset, size , bs ))) 
goto AddTraceback ; 

} 

20 else 
{ 

if ( !( compressed = (void *)IHBDeclare( size ) ) ) 
goto AddTraceback ; 

} 

25 } 

IHBOptimize(compressed) ; 
return compressed ; 
AddTraceback : 

return (void *)NULL ; 

30 } 

CountBitSets(char *inputFileName) 
{ 

FILE *inputFile ; 
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int i ; 
char "line ; 
long length ; 
long next ; 
5 long here ; 

if ( !(inputFile = fopen(inputFileName,"r"))) 

goto UnableToOpenFile ; 

i =0; 
while ( 1 ) 

{ 

here = ftell(inputFile); 

if ( -i == UTL_SCAN_GETS( inputFile, "\\" , &line)) 
break; 

if (UTL_STR_NCMP_NOCASE(line,"@BITSET_LENGTH:",15) != 0 
{ 
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15 



/* 

** Kludge for dbsearch old format. 
*/ 

if ( UTL_STR_NCMP_NOCASE(line,"@BITSET_START: ,, ,14) 



20 0 ) 



break; 



} 



25 



i++ ; 

length = (long ) atoi(line+15) ; 
next = here + length ; 

fseek(inputFile,next,SEEK_SET); 



} 



return i ; 
UnableToOpenFile : 
30 AddTraceback : 
return 0 ; 

} 
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/* 

**+E: 



** Function Name : WriteOutCompressedBSFileO 



** Purpose 



Function will write out a compressed bitset to disk. 



To be used by dbsearch/design programs to write out check 
points. 



** Usage : 
10 ** 

** Returns : 1 on success, 0 for failure. 



** Algorithms : None. 

15 ** 

** Revision History : 



** Author 

** == = == = 



Date 



Description 



** Fred Soltanshahi 07/25/96 Original version. 

** 

**-E: 
*/ 

25 int 

WriteOutCompressedBSFile(outputFileName,masterFileName,masterRec,programName,co 
mpressed,numVariations,sizes,allocSizes,numSelected,numInSites,bufferSize,buffer) 

char *outputFileName ; /* Name of output file */ 
char *masterFileName ; /* Name of master file */ 
30 int masterRec ; /* Which record in the master file */ 

char *programName ; /* Name of program generating this. */ 

void "compressed ; /* Compressed bitset */ 

int numVariations ; /* Num X01, X02 ... variation sites */ 



int *sizes ; 
int *allocSizes ; 
int numSelected 
int *numInSites 
5 int bufferSize ; 
int *buffer ; 

{ 

FILE *outputFile ; 
float version = VERSIONNUMBER ; 
10 long bitsetSize ; 
time__t startTime ; 
int i ; 

int numBytes ; 
long bitsetStart = 0 ; 
15 long endOfFile = 0 ; 
long length ; 

long beginingOfFile = 0 ; 
char *dir ; 
char *base ; 
20 char *fullPathName ; 

/* 

** Calcualte the total size of the bitset 
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/* Actuall sizes in each dimension */ 
/* Allocated sizes(bitset size) in each dimension */ 
/* Number of products selected */ 
/* Number of selections in each Y J)? site */ 
/* Program specific buffer size */ 
/* Arbitrary data written by the program */ 



25 



bitsetSize = sizes[0] ; 

for ( i = 1 ; i < numVariations ; i++ ) 

bitsetSize = bitsetSize * sizes[i] ; 
if ( !(outputFile = fopen(outputFileName,"w"))) 

goto UnableToOpenFile ; 



/* 



30 ** File format is : 
** Version Number 
** Date/Time Stamp 

** "Location of the master file" "Record Number in the file". 



10 



15 
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** A copy of master file records(one line at a time) 
** Number of Variation sites 
** Actual(current) number of choices for each site. 
** Allocated number of choices for each site. 

** Source - program that generated this file.i.e. dbsearch, dbcsln_de S) etc 

- program command line parameters - ASCII representation of parameters 

- Program specific info - ASCII line of info specific to program. 
** Number of products selected. 

- Number of selections in each dimension - num_Y_01 .choices num_y_02_choices 
** Bitset size -- The compressed bitset size in bytes. 
** Bitset -- Row major bitset of products selected. 
*/ 

/* fprintf(outputFile,"@NEXT_SLOT:%0101d\n»,endOfFile)-/ 
beginingOfFile = ftell(outputFile); 

fprintf(outputFile,"@BITSET_LENGTH:%0101d\n",bitsetStart); 
fprintfCoutputFile.^VERSION^ofxn-.version); 
time( &startTime ); 

fprintf(outputFile, " % s" ,ctime(&startTime)); 
dir = GetFullPathName(masterFileName); 
20 base = basename(masterFileName,NULL); 

fullPathName = UTL_FILE_ADD_DIR_TO_DIRSPEC(dir,base); 
fprintf(outputFile,"%s %d\n",fullPathName,masterRec); 
UTL_MEM_FREE(fullPathName) ; 

if ( !DumpMasterFileInfo(outputFile,masterFileName,masterRec)) 
25 goto UnableToDumpMaster ; 

fprintf(outputFile, " %d\n" ,numVariations); 
for ( i = 0 ; i < num Variations ; i++ ) 
fprintf(outputFile," %d " ,sizes[i]); 
fprintf (outpu tFile , " \n " ) ; 

30 if ( allocSizes[0] = = - 1 ) 

CalculateAllocatedSizes(numVariations,sizes,allocSizes); 

for ( i = 0 ; i < numVariations ; i + + ) 

fprintf(outputFile,"%d ",allocSizes[i]); 
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fprintf(outputFile, "\n"); 
fprintf(outputFile, " %s\n" ,programName); 
fprintf(outputFile, " %d\n" ,numSelected) ; 
for ( i = 0 ; i < num Variations ; i++ ) 
5 fprintf(outputFile, " %d " ,numInSites[i]); 

fprintf(outputFile, " \n ") ; 

/* 

** Now the product bitset. 
*/ 

10 bitsetStart = ftell(outputFile); 

fprintf(outputFile,"@PRODUCT_BITSET\n"); 

IHBDump(compressed,TestDump,outputFile); 

fprintfCoutputFile/^PROGRAMJNFOXn"); 

fprintf(outputFile," %d\n" ,bufferSize); 
15 if ( buffer ) 

fwrite(buffer,bufferSize, 1 ,outputFile); 

endOfFile = ftell(outputFile); 

length = endOfFile - beginingOfFile ; 

fseek(outputFile,beginingOfFile,SEEK_SET); 
20 fprintf(outputFile,"@BITSET_LENGTH: %0101d\n" .length); 

/* 

** Go back to the begining and write out the header info for the file 
*/ 

rewind(outputFile); 
25 /* fprintf(outputFile,"@NEXT_SLOT:%0101d\n",endOfFile);*/ 

fclose(outputFile); 
return 1 ; 
UnableToDumpMaster : 

fprintf(stderr,"WriteOutCompressedBSFile()-Unable to dump master file 

30 % s\n " , masterFileName) ; 

goto AddTraceback ; 
UnableToOpenFile : 

fprintf(stderr,"WriteOutCompressedBSFile0--Unable to open 
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%s\n" .outputFileName); 

goto AddTraceback ; 

UnableToWriteToFile : 

fprintf(stderr,"WriteOutCompressedBSFileO--Unable to write to output file\n"); 

5 goto AddTraceback ; 

UnableToCreateBitSet : 

fprintf(stderr,"Writ e OutCompressedBSFileO--Unable to create compressed 

bitset\n"); 

goto AddTraceback ; 

10 AddTraceback : 
return 0 ; 

} 

/* 

**+E: 

15 ** 
** 

** Function Name : WriteOutCheckPointFile() 
** 

** Purpose : Function will write out a check point(bitset file to 
20 ** the given file. 

#* 

** Usage : To be used by dbsearch/design programs to write out check 

** points. 

** 

25 ** Returns : 1 on success, 0 for failure. 
** 

** Algorithms : None. 
** 

** Revision History : 
30 ** 

** Author Date Description 
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** Fred Soltanshahi 07/25/96 Original version. 

** 

**-E: 
5 */ 
int 

WriteOutCheckPointFile(outputFileName,masterFileName,masterRec,programName,bitSet, 
offset,numVariations,sizes,allocSizes,numSelected,numInSites,bufferSize,buffer) 

char *outputFileName ; /* Name of output file */ 
10 char *masterFileName ; /* Name of master file */ 

int masterRec ; /* Which record in the master file */ 

char *programName ; /* Name of program generating this. */ 

int *bitSet ; /* Actual bitset */ 

int offset ; /* Offset into the bitset */ 

15 int numVariations ; /* Num X01, X02 ... variation sites */ 

int *sizes ; /* Actuall sizes in each dimension */ 

int *allocSizes ; /* Allocated sizes(bitset size) in each dimension */ 

int numSelected ; /* Number of products selected */ 

int *numInSites ; /* Number of selections in each Y_0? site */ 
20 int bufferSize ; /* Program specific buffer size */ 

int ""buffer ; /* Arbitrary data written by the program */ 

{ 

void ^compressed ; 
/* 

25 ** Created a compressed bitset. 
*/ 

if ( ! (compressed = CreateCompressedBitSet(bitSet, 

offset, 



30 numVariations, 



sizes, 



allocSizes) ) ) 



10 
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goto UnableToCreateBitSet ; 
if ( !WriteOutCompressedBSFile(outputFileName, 

masterFileName, 
masterRec, 

programName, 
compressed, 
num Variations, 
sizes, 
allocS izes, 
numSelected, 
numlnSites, 
bufferSize, 
buffer)) 

goto UnableToWriteToFile ; 
15 IHBDestroy (compressed); 

return 1 ; 

UnableToWriteToFile : 

fprintfCstderr/'WriteOutCheckPointFileO-Unable to write to output file\n M ); 

goto AddTraceback ; 

20 UnableToCreateBitSet : 

fprintf(stderr, n WriteOutCheckPointFile0--Unable to create compressed bitsetW); 

goto AddTraceback ; 
AddTraceback : 
return 0 ; 

25 } 

static int TestRestore(DumpCode, Parameter, Dumplnt) 
int DumpCode; 
void *Parameter; 
int *DumpInt; 
30 { 

FILE *fp = (FILE *)Parameter ; 
fscanf(fp, "%d\n", Dumplnt); 
return 1 ; 
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} 

static int GetReagentInfo(char *fileName,char **reagentInfo) 
{ 

FILE *fp ; 
5 char buffer[4096] ; 
int i = 0 ; 
char *cp ; 

if ( !(fp = fopen(fileName,"r")) ) 

return 0 ; 

1 0 while ( fgets(buf fer , sizeof(buffer)- 1 , fp)) 

{ 

buffer[strlen(buffer)-l] = 0 ; 

if ( ( cp = strstr(buffer,"# USER_NAME=")) ) 

{ 

15 (*reagentInfo) = UTL_STR_SAVE(buffer+12); 

return 1; 

} 

/* 

** Only read the first 10 lines. 
20 */ 

if(i+ + > 10) 
break; 

} 

/* 

25 ** we did not find the reagent info, lets save an empty string and 
** assume the file is old and does not contain the info. 
*/ 

reagentlnfo = UTL_STR_SAVE(""); 
return 1 ; 

30 } 
/* 
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** Function Name : ReadCheckPointFileO 
** 

5 ** Purpose 



** Usage 
#* 

10 ** 



** Returns 



: Function will write out a check point(bitset file to 
the given file. 



To be used by dbsearch/design programs to write out check 
points. 



: 1 on success, 0 for failure. 
** Algorithms : None. 



15 ** Revision History 
** Author 



20 ** Fred Soltanshahi 



Date 



Description 



07/25/96 Original version. 



*/ 

static int 

25 ReadCheckPointFile(inputFileName,inputOffset,masterFileName,masterRec,programName, 
bitSet,numVariations,sizes,allocSizes,numSelected,numInSites,masterInfo,bufferSize,progBu 

ff) 

char *inputFileName ; /* Name of input file */ 

int inputOffset ; /* Offset into the input file where the bitset starts*/ 
30 char **masterFileName ; /* Name of master file */ 

int *masterRec ; /* Which record in the master file */ 
char **programName ; /* Name of program generating this. */ 
int **bitSet ; /* Actual bitset */ 
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int "numVariations ; /* Num X01, X02 ... variation sites */ 
int **sizes ; /* Actuall sizes in each dimension */ 

int **allocSizes ; /* Allocated sizes(bitset size) in each dimension */ 
int *numSelected ; /* Number of products selected */ 
5 int **numInSites ; /* Number of selections in each Y_0? site */ 
struct MasterFUeStruct *masterInfo ; 
int *bufferSize ; /* program specific buffer size */ 
int **progBuff ; /* Program specific buffer */ 

{ 

10 FILE *inputFile ; 

float version ; 

long bitsetSize ; 

time_t startTime ; 

int i ; 
15 int numBytes ; 

char buffer[4096] ; 

char hold[81] ; 

char *cp ; 

if ( !(inputFile = fopen(inputFileName,"r"))) 
20 goto UnableToOpenFile ; 

/* 

** File format is : 

** @BITSET_START:Where In the File The bitset starts 
** ©VERSION: Version Number: currentVersion Number 

25 ** Date/Time Stamp 

** "Location of the master file" "Record Number in the file". 

** ©MASTER NumberOfLines : Number of lines used for the master file. 

** A copy of master file records(one line at a time,currently 11 lines ) 

** Number of Variation sites 
30 ** Actual(current) number of choices for each site. 

** Allocated number of choices for each site. 

** Source -- program that generated this file.i.e. dbsearch, dbcsln_des,etc 
** Number of products selected. 
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** Number of selections in each dimension - num_Y„01 .choices num_y_02_cho 1C es . 
** @PRODUCT_BITSET 

** Bitset -- Row major bitset of products selected. 
** ©PROGRAM JNFO 
5 ** size of program specific buffer 
** program specific buffer 
*/ 
/* 

** BITSET_START: 
10 */ 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 
goto UnableToReadFile ; 

/* 

** VERSION. 
15 */ 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 
cp = strstr(buffer,"@VERSION:"); 

if ( !cp ) 

goto VersionMissing ; 

version = atof(buffer); 
if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 
if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 
sscanf(buffer," %s %d",hold,masterRec); 
(*masterFileName) = UTL_STR_SAVE(hold); 
if ( masterlnfo ) 

{ masterlnfo- > masterFilePathName = UTL_STR_SAVE(hold); 
masterlnfo- >masterRecNo = *masterRec ; 



20 



25 



30 



/* 



©MASTER 11 thing. 
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*/ 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 
goto UnableToReadFile ; 

/* 

5 ** Reaction class 
*/ 

if ( »fgets(buffer,sizeof(buffer)-l,inputFile)) 
goto UnableToReadFile ; 

/* 

10 ** Prefix 
*/ 

if ( !f ge ts(buffer,sizeof (buffer)- l.inputFile)) 

goto UnableToReadFile ; 
buffer[strlen(buffer)-l] = 0 ; 
15 masterInfo->prefixForFiles = 

UTL STR_SAVE(UTL_FILE_PARSE(buffer ,4)) ; 

/* 

** number of sites 
*/ 

20 if ( ! fgets(buffer ,sizeof(buffer)- 1 .inputFile)) 

goto UnableToReadFile ; 
masterInfo->numVariationSites = atoi(buffer); 

/* 

** missing bits 
25 */ 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 
masterInfo->numberOfMissingBits = atoi(buffer); 

/* 

30 ** Lbits 
*/ 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 
goto UnableToReadFile ; 
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masterlnfo- > lbits = atoi(buffer); 

/* 

** core file 
*/ 

5 if ( ! fgets(buffer,sizeof(buffer)- 1 ,inputFile)) 

goto UnableToReadFile ; 
buffer[strlen(buffer)-l] = 0 ; 

masterlnfo- > corefilePathName = UTL_STR_SAVE(buffer); 

/* 

10 ** Start core 
*/ 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile» 

goto UnableToReadFile ; 
masterlnfo- > startCore = atoi (buffer); 

15 /* 

** fingure print file name 

*/ 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 
goto UnableToReadFile ; 
20 buffer[strlen(buffer)-l] = 0 ; 

masterlnfo- > fingerFileName = UTL_STR_S A VE(buffer) ; 

/* 

** start fingure print 
*/ 

25 if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 
masterlnfo- > fingerOffset = atoi(buffer); 

if (! (masterlnfo- >x_FileName = (char **)UTL_MEM_C ALLOC ( 

masterlnfo- > numVariationSites, 
3Q sizeof(char *))) ) 

goto AddTraceback ; 
if (! (masterlnfo- >reagentInfo = (char **)UTL_MEM_C ALLOC ( 

masterlnfo- > numVariationSites, 
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sizeof(char *))) ) 

goto AddTraceback ; 

/* 

** XI file pathname 
5 */ 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 
buffer[strlen(buffer)-l] = 0 ; 

masterInfo->x_FileName[0] = UTL_STR_SAVE(buffer); 
10 if ( !GetReagentInfo(buffer,&masterInfo->reagentInfo[0])) 

goto UnableToReadReagentlnfoO ; 

/* 

** X2 file pathname 
*/ 

15 if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 
buffer[strlen(buffer)-l] = 0 ; 

masterInfo->x_FileName[l] = UTL_STR_SAVE(buffer); 
if ( !GetReagentInfo(buffer,&masterInfo->reagentInfo[l])) 
20 goto UnableToReadReagentlnfol; 

} 

else 

{ 

/* 

25 ** just skip the 12 lines. 
*/ 

for ( i = 0 ; i < 12 ; i++ ) 
{ 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 
30 goto UnableToReadFile ; 

} 

} 

if ( !fgets(buffer,sizeof(buffer)-l,inputFi!e)) 



542 

goto UnableToReadFile ; 
*num Variations = atoi(buffer); 
if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 

5 /* 

** We have to allocate the arrays for sizes, allocSizes and fragSizes . 
*/ 

if ( !( (*sizes) = (int *)UTL_MEM_CALLOC(*numVariations, 

sizeof(int)) 

10 )) 

goto AddTraceback ; 
if ( !( (*allocSizes) = (int *)UTL_MEM_CALLOC(*num Variations, 

sizeof(int)) 

)) 

15 goto AddTraceback ; 

if ( !( (*numInSites) = (int *)UTL_MEM_CALLOC(*num Variations, 

sizeof(int)) 

)) 

goto AddTraceback ; 
20 cp = buffer ; 

for ( i = 0 ; i < *num Variations ; i++ ) 

{ 

sscanf(cp," %d" ,&((*sizes)[i])); 
cp = strstr(cp," "); 

25 } 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 
cp = buffer ; 

for ( i = 0 ; i < *num Variations ; i + + ) 
30 { 

sscanf(cp, " %d" ,&((*allocSizes)[i])) ; 
cp = strstr(cp," "); 

} 
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if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 

buffer[strlen(buffer)-l] = 0 ; 
(*programName) = UTL_STR_S A VE(buffer) ; 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 
*numSelected = atoi(buffer); 
if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 

cp = buffer ; 

for ( i = 0 ; i < *num Variations ; i + + ) 

{ 

sscanf(cp," %d" ,&((*numInSites)[i])); 
cp = strstr(cp," "); 

} 

/* 

** @PRODUCT_BITSET 
*/ 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 
IHBRestore(bitSet,TestRestore,inputFile); 

/* 

** @PROGRAM_INFO 
*/ 

if ( !fgets(buffer,sizeof(buffer)-l,inputFile)) 

goto UnableToReadFile ; 
fclose(inputFile); 
return 1 ; 
VersionMissing : 

fprintf(stderr,"ReadCheckPointFile()--File %s is not a valid ChemSpace 

file\n" ,inputFileName); 

goto AddTraceback ; 
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UnableToReadFile : 

fprintf(stderr,"ReadCheckPointFile()-Unable to read from file\n"); 

goto AddTraceback ; 

UnableToDumpMaster : 
5 fp ri ntf(stderr,"ReadCheckPointFileO--Unable to dump master file 

% s\n " , masterFileName) ; 

goto AddTraceback ; 
UnableToReadReagentlnfoO : 

fprintf(stderr,"ReadCheckPointFileO--Unable to read reagent info in %s\n», 

10 masterlnfo- > x_FileName[0]) ; 
UnableToReadReagentlnfol : 

fprintf(stderr,"ReadCheckPointFile()-Unable to read reagent info in %s\n", 

masterlnfo- > x_FileName[l]); 
goto AddTraceback ; 

15 UnableToOpenFile : 

fprintf(stderr,"ReadCheckPointFile()-Unable to open %s\n",in P utFileName); 

goto AddTraceback ; 

UnableToWriteToFile : 

fprintf(stderr,"ReadCheckPointFile()-Unable to write to output file\n"); 

20 goto AddTraceback ; 

AddTraceback : 
return 0 ; 

} 

static struct BitSetFileStruct *ReadAndAllocateMaster(char *masterFileName,int 
25 masterRecNumber,int *initBitset) 
{ 

struct BitSetFileStruct *bitset ; 

FILE *mfFP ; 

/* 

30 ** First allocate everything we need. 
*/ 

if ( !(bitset = (struct BitSetFileStruct *)UTL_MEM_CALLOC(l, 
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sizeof(struct BitSetFileStruct )))) 
goto AddTraceback ; 

bitset- > masterFilelnfo.masterFilePathName = UTL_STR_S AVE(masterFileName); 
bitset->masterFileInfo.masterRecNo = masterRecNumber; 

5 /* 

** Some if this info is fixed for now. 
*/ 

bitset- >programInfo.programName = (char *)NULL ; 

bitset->masterFileInfo.numVariationSites = bitset- > numVariationSites = 2 ; 

10 bitset- > totalSelected = 0 ; 

if (!(bitset->masterFileInfo.x_FileName = (char -)UTL_MEM_CALLOC( 

bitset- > numVariationSites, 
sizeof(char *))) ) 

goto AddTraceback ; 
15 if (!(bitset->actuallSizes = (int *)UTL„MEM_CALLOC( 

bitset- > numVariationSites, 
sizeof(int ))) ) 

goto AddTraceback ; 
if (! (bitset- >allocSizes = (int *)UTL_MEM_CALLOC( 
2Q bitset- > numVariationSites, 

sizeof(int ))) ) 

goto AddTraceback ; 
if (! (bitset- >numFragsInEachSite = (int *)UTL_MEM_CALLOC( 

bitset- > numVariationSites, 
2 5 sizeof(int ))) ) 

goto AddTraceback ; 
if ( !RetrieveMasterFile(masterFileName, 

mfFP, 

masterRecNumber, 



30 &(bitset- > masterFilelnfo.numberOfMissingBits), 

&(bitset- > masterFilelnfo.lbits), 
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&(bitset- > masterFilelnfo.corefilePathName) , 

&(bitset- > masterFilelnfo.startCore), 

&(bitset- > masterFilelnfo.fingerFileName), 
&(bitset- > masterFilelnfo. x_FileName[0]), 
5 &(bitset- > masterFilelnfo. x_FileName[l]), 

&(bitset- > actuallSizes[0]), 
&(bitset- > actuallSizes[l]), 
NULL, 

&(bitset- > masterFilelnfo.fingerOffset), 

[0 NULL, 

NULL, 
NULL, 
NULL, 
NULL, 

15 NULL)) 
goto AddTraceback ; 
if ( !( bitset- > bitset = CreateCompressedBitSet(initBitset, 

0, 

bitset- > numVariationSites, 
2Q bitset- >actuallSizes, 

bitset- >allocSizes)) ) 

goto AddTraceback ; 
return bitset ; 
AddTraceback : 

25 fprintf(stderr,"ReadAndAllocateMasterO--Unable to read master file\n"); 

return (struct BitSetFileStruct *) NULL ; 

} 



static int CalculateFragsInSties( struct BitSetFileStruct *bitset ) 
{ 
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unsigned char **bitArray ; 
int i ; 
intj ; 
int size ; 
5 int address ; 
int *indxs = 0 ; 
int what ; 
int this ; 

InitO; 

10 if ( !( bit Array = (unsigned char 

**)UTL MEM_C ALLOC (bitset- > numVariationSites, 



sizeof(unsigned char *))) ) 

goto AddTraceback ; 
5 for ( i = 0 ; i < bitset- > numVariationSites ; i++ ) 

{ 

size = ( bitset- > actuallSizes[i] + 7 ) / 8 ; 
if ( !(bitArray[i] = (unsigned char 
*)UTL_MEM_CALLOC(size,sizeof (unsigned char)))) 
IQ goto AddTraceback ; 

} 

for ( address = -1 , i = 0 ; i < bitset- > totalSelected ; i++ ) 
{ 

address = IHBFindNextOne(bitset->bitset,address+ 1); 
25 BitSet AddressToIndexes(bi tset , address , &indxs ,0) ; 

/* 

** For every hit and every variation site address set the bit to 1. 
*/ 

for ( j = 0 ; j < bitset- > numVariationSites ; j + + ) 
30 { 

what = indxs[j] % 8; 
this = indxs[j] / 8; 
bitArray[j][this] j = setbits[what]; 
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} 

} 

for ( i = 0 ; i < bitset- > numVariationSites ; i + + ) 
{ 

size = ( bitset- > actuallSizes[i] + 7 ) / 8 ; 
bitset- > numFragsInEachSite[i] = 0 ; 

for ( j = 0 ; j < size ; j + + ) 

bitset- >numFragsInEachSite[i] += nbits[bitArray[i]D] & 255]; 



} 

/* 

** Get read of memory we allocated for calculating the hits the last ti 
*/ 

if ( bitset- > fragmentBitset ) 
{ 

for ( i = 0 ; i < bitset- > numVariationSites ; i++ ) 
UTL_MEM_FREE(bitset- > fragmentBitset[i]); 
UTL_MEM_FREE(bitset- > fragmentBitset ) ; 

} 

bitset- > fragmentBitset = bitArray ; 
UTL_MEM_FREE(indxs) ; 

♦ 

return 1 ; 
AddTraceback : 
return 0 ; 

} 



static int GetPartialProductsAddresses( S truct BitSetFileStruct *bitset,int numFixedSites, int 

*fixedSitesIndexes, int site, int **hitlndexes) 

{ 

int i ; 
intj ; 
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int k ; 
int address ; 
int *indxs = 0 ; 
int skiplt ; 
5 int numHits = 0 ; 

unsigned char **bitArray ; 
int size ; 
int what ; 
int this ; 

10 InitO; 

if ( !( bitArray = (unsigned char 
**)UTL_MEM_CALLOC(bitset- > numVariationSites, 

sizeof(unsigned char *))) ) 
15 goto AddTraceback ; 

for ( i = 0 ; i < bitset- > numVariationSites ; i++ ) 

{ 

1* We only want to count the fragments for the site that is being exploded. 
20 */ 

if ((site != -1 )&&(i!= site) ) 
continue ; 

size = ( bitset- > actuallSizes[i] + 7 ) / 8 ; 
if ( ! (bit Array [i] = (unsigned char 
25 *)UTL_MEM_CALLOC(size,sizeof(unsigned char)))) 

goto AddTraceback ; 

for ( address = -1 , i = 0 ; i < bitset- > totalSelected ; i+ + ) 

3Q { addr ess = IHBFindNextOne(bitset->bitset,address+l); 

BitSetAddressToIndexes(bitset,address,&indxs ( 0); 

/* 

** The sites that have already been expanded will constraint what hits 
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** we find. 
*/ 

if ( numFixedSites ) 
{ 

5 skiplt = 0 ; 

for ( k = 0 ; k < bitset- > numVariationSites ; k++ ) 

{ 

if ( fixedSitesIndexes[k] == -1 ) 
continue ; 

10 /* 

** our hit index matches our constraint index. 
*/ 

if ( fixedSitesIndexes[k] ! = indxsfk] ) 
{ 

15 skiplt = 1 ; 

break; 

} 

} 

if ( skiplt ) 
20 continue ; 

} 

numHits+4- ; 

for ( j = 0 ; j < bitset- > numVariationSites ; j + + ) 
{ 

25 if ( ( site ! = -1 ) && ( j ! = site ) ) 

continue ; 
what = indxs[j] % 8; 
this = indxs[j] / 8; 
bitArray[j][this] ! = setbits[what]; 

30 } 
} 

#if 0 
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** Figure out how many hits there are for this site. 
*/ 

for ( k = 0 ; k < bitset- > numVariationSites ; k+ + ) 
{ 

5 if ( site ! = k ) 

continue ; 

size = ( bitset- >actuallSizes[k] + 7 ) / 8 ; 
numFragmentsPerSite[k] = 0 ; 
for ( j = 0 ; j < size ; j + + ) 
10 numFragmentsPerSitefk] += nbits[bitArray[k]|j] & 255]; 

} 

#endif 
/* 

** Now get the indexes for all the hits. 
15 */ 

if ( !( (*hitlndexes) = (int *)UTL_MEM_CALLOC(numHits, 

sizeof(int))) 

) 

goto AddTraceback ; 

20 numHits = 0 ; 

for ( k = 0 ; k < bitset- > numVariationSites ; k++ ) 

{ 

if ( site = = -1 ) 
{ 

25 if ( fixedSitesIndexes[k] ! = -1 ) 

continue ; 

} 

else 

{ 

30 if ( site ! = k ) 

continue ; 

} 

size = ( bitset- >actuallSizes[k] + 7 ) / 8 ; 



10 
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for ( i = 0 ; i < size ; i++ ) 
{ 

if ( bitArray[k][i] ) 
{ 

5 /* 

** If any bit is set in the byte then we need to figure out what the hits are. 
*/ 

for(j =0; j < 8 ;j + + ) 
{ 

if ( bitArray[k][i] & setbitsG] ) 
{ 

(*hitIndexes)[numHits++] = i * 8 + j ; 

} 

} 

15 } 

} 

} 

for ( i = 0 ; i < bitset- > numVariationSites ; i + + ) 

if ( bitArray[i] ) 
20 UTL_MEM_FREE(bitArray[i]); 

UTL_MEM_FREE(bit Array ); 

return numHits ; 

AddTraceback : 
return 0 ; 

25 } 



static int GetPartialProductsStats( struct BitSetFileStruct *bitset , int numFixedSites, 
*fixedSitesIndexes, int *numProducts, int *numFragmentsPerSite) 

{ 

int i ; 
30 intj ; 
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int k ; 
int address ; 
int *indxs = 0 ; 
int skiplt ; 
5 int numHits = 0 ; 

unsigned char **bitArray ; 

int size ; 
int what ; 
int this ; 

10 InitO; 

if ( !( bit Array = (unsigned char 
**)TJTL_MEM_C ALLOC (bitset- > numVariationSites, 

sizeof(unsigned char *))) ) 
{5 goto AddTraceback ; 

for ( i = 0 ; i < bitset- > numVariationSites ; i++ ) 

{ 

'I We only want to count the fragments for the sites that are not being 
20 ** exploded. 
*/ 

if ( fixedSitesIndexes[i] != -1 ) 
continue ; 

size = ( bitset- >actuallSizes[il + 7 ) / 8 ; 
25 if ( !(bitArray[i] = (unsigned char 

*)UTL_MEM_CALLOC(size,sizeof(unsigned char)))) 
goto AddTraceback ; 

L ( address = -1 , i - 0 ; i < bitset- > totals elected ; i+ + ) 

address = IHBFindNextOne(bitset->bitset,address+l), 
BitSetAddressToIndexes(bitset,address,&indxs,0); 



15 
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** The sites that have already been expanded will constraint what hits 

** we find. 

*/ 

if ( numFixedSites ) 
5 { 

skipit = 0 ; 

for ( k = 0 ; k < bitset- > numVariationSites ; k++ ) 
{ 

if ( fixedSitesIndexes[k] = = -1 ) 
1Q continue ; 

/* 

** our hit index matches our constraint index. 
*/ 

if ( fixedSitesIndexes[k] != indxs[k] ) 
{ 

skipit = 1 ; 
break; 

} 

} 

20 if ( skipit ) 

continue ; 

} 

numHits++ ; 

for ( j = 0 ; j < bitset- > numVariationSites ; j + + ) 
25 { 

if ( fixedSitesIndexesU] != -1 ) 

continue ; 
what = indxs[j] % 8; 
this = indxsft] / 8; 
30 bitArray[j][this] | = setbits[what]; 

} 

} 

for ( k = 0 ; k < bitset- > numVariationSites ; k++ ) 
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{ 

if ( fixedSitesIndexes[k] != -1 ) 
continue ; 

size = ( bitset- > actuallSizes[k] + 7 ) / 8 ; 
5 numFragmentsPerSite[k] = 0 ; 

for (j = 0 ; j < size ; j + + ) 

numFragmentsPerSite[k] += nbits[bitArray[k][j] & 255]; 



} 

*numProducts = numHits ; 
IQ for ( i = 0 ; i < bitset- > numVariationSites ; i+ + ) 

if ( bitArray[i] ) 

UTL_MEM_FREE(bitArray[i]); 

UTL_MEM_FREE(bit Array ); 

return numHits ; 

15 AddTraceback : 
return 0 ; 

} 

static int GetPartialProducts( struct BitSetFileStruct "bitset , int numFixedSites, 
*fixedSitesIndexes, int whichSite, int **sitelndexes) 

20 { 

int i ; 
intj ; 
int k ; 

int address ; 
25 int *indxs = 0 ; 
int skiplt ; 
int numHits = 0 ; 
InitO; 

for ( address = -1 , i = 0 ; i < bitset- > totalSelected ; i++ ) 
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{ 

address = IHBFindNextOne(bitset->bitset,address+l); 
BitSet AddressToIndexes(bitset,address , &indxs ,0) ; 

/* 

5 ** The sites that have already been expanded will constraint what hits 
** we find. 
*/ 

if ( numFixedSites ) 
{ 

10 skiplt = 0 ; 

for ( k = 0 ; k < bitset- > numVariationSites ; k+ + ) 

{ 

if ( fixedSitesIndexes[k] == -1 ) 
continue ; 

15 /* 

** our hit index matches our constraint index. 
*/ 

if ( fixedSitesIndexes[k] ! = indxs[k] ) 
{ 

20 skiplt = 1 ; 

break; 

} 

} 

if ( skiplt ) 
25 continue ; 

} 

numHits+ + ; 

fprintf(stderr,"Gotahiton %d %d %d\n",address,indxs[0].indxs[l]); 
} 

30 return numHits ; 



AddTraceback : 
return 0 ; 
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} 

static GetFragmentsUsedlnASiteC struct BitSetFileStruct -bitset , int whichSite , int " 
indxs) 

{ 

5 unsigned char *bitArray ; 

int i ; 

intj ; 

int size ; 

int *address ; 
10 int numHits = 0 ; 

int bit ; 

if ('.(address = (int 
WTL MEM C ALLOC(bitset->numFragsInEachSite[whichSite], 
' ~ ~ sizeof(int))) ) 

15 goto AddTraceback ; 

/* 

** Figure out how many ints there are in this bitset. 



*/ 

„«, = ( bitset- >actuallSizes[whichSite] + 7 ) / 8 ; 

fo7( bitArray = bitset- > fragmentBitset[whichSite] , i = 0 ; i < size ; i+ + ) 



size 

20 

{ 

if ( bitArray[i] ) 
{ 

25 1* If any bit is set in the byte then we need to figure out what the hits are 
*/ 

for (j = 0 ; j < 8 ; j + + ) 
{ 

if ( bitArray[i] & setbits[j] ) 

30 * . 

address[numHits+ +] = i * » + J , 

} 
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} 

} 

} 

(*indxs) = address ; 
5 return numHits ; 

AddTraceback : 
return 0 ; 

} 



static struct 



BitSetFileStruct *ReadAndAllocate(char *fileName ,int offset ) 



10 { 



struct BitSetFileStruct *bitset ; 

if ( !(bitset = (struct BitSetFileStruct *)UTL_MEM_CALLOC(l, 
sizeof(struct BitSetFileStruct )))) 
goto AddTraceback ; 
15 if ( !ReadCheckPointFile(fileName, 



&(bitset- > masterFilelnfo.masterFilePathName) , 



offset, 



&(bitset- > masterFilelnfo.masterRecNo), 



20 



25 



&(bitset- > programlnfo.programName), 

&(bitset->bitset), 

&(bitset- > numVariationSites), 

&(bitset- > actuallSizes), 

&(bitset- > allocSizes), 

&(bitset- > totalSelected), 

&(bitset- > numFragsInEachSite), 

&(bitset- > masterFilelnfo), 

&(bitset- > programlnfo.bufferSize) , 

NULL)) 



goto AddTraceback ; 
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return bitset ; 
AddTraceback : 

return ( struct BitSetFileStruct *)NULL ; 

} 

5 static int ReadBitsetCoreInfo(void *bs, char **masterFileName, int *masterRecno, char 
**core, char **xrString, int "numSites, char ***xFileNames ) 
{ 

struct BitSetFileStruct *bitset = (struct BitSetFileStruct *) bs; 
int recNo ; 
10 FILE *fp ; 
int i ; 

int found = 0 ; 
char *line ; 
char *cp ; 
15 char *cpl ; 

*numSites = bitset- > numVariationSites ; 

*masterFileName = bitset- >masterFileInfo.masterFilePathName; 

*masterRecno = bitset- > masterFilelnfo.masterRecNo; 

if ( !((*xFileNames) = (char **)UTL_MEM_CALLOC(*numSites, 

20 sizeof(char *)) )) 

goto AddTraceback ; 
for ( i = 0 ; i < *numSites ; i++ ) 
(*xFileNames)[i] = 
UTL STR_SAVE(bitset-> masterFileInfo.x_FileName[i]); 

25 /* 

** Open the core file and read in the core and parse out the XRstring. 
*/ 

if ( !(fp = fopen(bitset->masterFileInfo.corefilePathName,"r")) ) 

goto UnableToReadCore ; 

30 recNo = 0 ; 

found = 0 ; 
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while ( -1 != UTL_SCAN_GETS( fp, "W", &line)) 
{ 

recNo++ ; 

if ( recNo - = bitset- > masterFilelnfo. startCore ) 
{ 

found — 1 ; 
break; 

} 

} 

if (! found ) 

goto UnableToReadCore ; 

/* 

** Replace all occurences of Y_0x with Xx. 
*/ 

(*core ) = UTL_STR_S A VE(li ne) ; 
cp = strstr(line,"XRLIST="); 
if ( !cp ) 

(*xrString) = UTL_STR_SAVE(""); 

else 
{ 

/* 

** Skip the first double quote. 
*/ 

cp + = 8 ; 

/* 

** Go find the end of double quotes. 
*/ 

cpl = cp ; 

while ( (*cp) != "") 

cp++ ; 
*cp = 0 ; 

(*xrString) - UTL_STR_SAVE(cpl); 

} 
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fclose(fp); 

return 1 ; 
UnableToReadCore : 

fprintf(stderr,"ReadBitsetCoreInfoO - Unable to read core %s %d\n", 
5 bitset- > masterFilelnfo.corefilePathName, 

bitset- > masterFilelnfo.startCore); 
AddTraceback : 

fprintf(stderr,"ReadBitsetCoreInfoO - Unable to read core info\n"); 
return 0 ; 

10 } 



static int ReadMasterCoreInfo(char *masterFile, int index, char **core, char **xrString, 
int *numSites, char ***xFileNames ) 

{ 

int recNo ; 
15 FILE *fp ; 
int i ; 

int found = 0 ; 

char *line ; 

char *cp ; 
20 char *cpl ; 

char *prefix = (char *)NULL ; 

char *coreFile — (char *)NULL ; 

char *fpFileName = (char *)NULL ; 

int fpOffset ; 
25 int mBits ; 

int IBits ; 

int startCore ; 

*numSites = 2 ; /* fixed for now */ 

if ( !((*xFileNames) = (char **)UTL_MEM_CALLOC(*numSites, 



30 sizeof(char *)) )) 
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goto AddTraceback ; 

Get the master file info. 

if ( !GetMasterRecordHeader(masterFile, 

index, 

&prefix, 

&mBits, 

&lBits, 

&coreFile, 

&startCore, 

&(*xFileNames)[0], 

&(*xFileNames)[l], 

numSites, 

&fpFileName, 

&fpOffset)) 

goto AddTraceback ; 

* Open the core file and read in the core and parse out the XRstring. 
/ 

if ( i(fp = f 0 pen(coreFile,"r")) ) 
goto UnableToReadCore ; 
recNo = 0 ; 
found = 0 ; 

while ( -1 != UTL_SCAN_GETS( fp, "W", &line)) 
{ 

recNo+ + ; 

if ( recNo = = startCore ) 
{ 

found = 1 ; 
break; 

} 
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if ('.found ) 

goto UnableToReadCore ; 
(*core ) = UTL_STR_S A VE(line) ; 
cp = strstr(line, M XRLIST="); 

5 if ( !cp ) 

(*xrString) = UTL_STR_SAVE(""); 

else 
{ 

/* 

10 ** Skip the first double quote. 
*/ 

cp + = 8 ; 

/* 

** Go find the end of double quotes. 
15 */ 

cpl = cp ; 

while ( (*cp) != "") 

cp++ ; 
*cp = 0 ; 

20 (*xrString) = UTL_STR_SAVE(cpl); 

} 



fclose(fp); 

if ( coreFile ) 

UTL_MEM_FREE(coreFile) ; 

25 if ( fpFileName ) 

UTL_MEM_FREE(fpFileName) ; 

if ( prefix ) 

UTL_MEM_FREE(prefix) ; 

return 1 ; 

30 UnableToReadCore : 

fprintf(stderr,"ReadMastersetCoreInfo() -- Unable to read 

coreFile.startCore); 
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AddTraceback : 

fprintf(stderr,"ReadMastersetCoreInfoO ~ Unable to read core info\n"); 
if ( coreFile ) 

UTLMEMFREE(coreFile) ; 
5 if ( fpFileName ) 

UTL_MEM_FREE(fpFileName) ; 

if ( prefix ) 

UTL_MEM_FREE(prefix) ; 
return 0 ; 

10 } 

static void DeallocateBitset( struct BitSetFileStruct *bitset ) 
{ 

int i ; 

if ( bitset->masterFileInfo.masterFilePathName ) 
15 UTL_MEM_FREE(bitset- > masterFilelnfo.masterFilePathName); 

if ( bitset->masterFileInfo.corefilePathName ) 

UTL_MEM_FREE(bitset- > masterFilelnfoxorefilePathName); 
if ( bitset->masterFileInfo.fingerFileName ) 

UTL_MEM_FREE(bitset- > masterFilelnfo.fingerFileName); 
20 if ( bitset->masterFileInfo.prefixForFiles ) 

UTL_MEM_FREE(bitset- > masterFilelnfo.prefixForFiles); 
for ( i = 0 ; i < bitset->masterFileInfo.numVariationSites ; i + + ) 

UTL_MEM_FREE(bitset- > masterFileInfo.x_FileName[i]); 
if ( bitset->masterFileInfo.x_FileName) 
25 UTL_MEM_FREE(bitset- > masterFilelnfo. x_FileName) ; 

if ( bitset->programInfo.programName ) 

UTL_MEM_FREE(bitset- > programlnfo.programName); 
if ( bitset->programInfo. buffer ) 

UTL_MEM_FREE(bitset- > programlnfo. buffer); 
30 IHBDestroy(bitset- > bitset); 

if ( bitset- >actuallSizes ) 

UTL_MEMJFREE(bitset- > actuallSizes); 
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if ( bitset->allocSizes ) 

UTL_MEM_FREE(bitset- > allocSizes); 
if ( bitset- > numFragsInEachSite ) 

UTL_MEM_FREE(bitset- > numFragsInEachSite); 
5 UTL_MEM_FREE(bitset); 

bitset = (struct BitSetFileStruct *) NULL ; 

} 

void CS_PRDCT_BITSET_DUMP( struct BitSetFileStruct *bitset ) 
{ 

10 int i ; 
int indx ; 
int indxl ; 
int indx2 ; 

fprintf(stderr, "Master file name : 
15 %s\n",bitset- > masterFilelnfo.masterFilePathName); 

fprintf(stderr, "Master file rec : % d\n", bitset- >masterFileInfo.masterRecNo); 
fprintf(stderr,"Program Name : %s\n",bitset->programInfo.programName); 
fprintf(stderr,"Number of Sites : %d\n",bitset->numVariationSites); 
fprintfCstderr," Number Selected : %d\n\ bitset- > totalSelected); 
20 fprintf(stderr," Actual Sizes : "); 

for ( i = 0 ; i < bitset- > numVariationSites ; i++ ) 

fprintf(stderr, " %d " .bitset- > actuallSizes[i]); 
fprintf(stderr,"\n"); 
fprintf(stderr," Alloc Sizes : "); 
25 for ( i = 0 ; i < bitset- > numVariationSites ; i++ ) 

fprintf(stderr, " %d " .bitset- > allocSizes[i]); 
fprintf(stderr,"\n"); 
fprintf(stderr,"Num Frags in X? : "); 

/* 

30 ** If the number of fragments is zero then we will write -1 to tell others 
** to calculate this themselves. 
*/ 

for ( i = 0 ; i < bitset- > numVariationSites ; i + + ) 
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fprintf(stderr,"%d ",(bitset->numFragsInEachSite[i] == 0 )?-l: 

bitset- > numFragsInEachSite[i]); 

fprintf(stderr,"\n"); 
fprintf(stderr, "Selections : \n"); 
5 indx = -1 ; 

do 
{ 

indx = IHBFindNextOne(bitset->bitset,indx+l); 
if ( indx == -1 ) 
10 break; 

indxl = indx / bitset- >allocSizes[l] ; 
indx2 = indx % bitset- >allocSizes[l] ; 
fprintf(stderr,"%d %d\n", indxl + 1 ,indx2 + 1 ); 
} while ( 1 ); 

15 } 

void CS_PRDCT_BITSET_GET_HITS( struct BitSetFileStruct '"bitset , int **indexes) 
{ 

int i ; 
int indx ; 
20 int indxl ; 
int indx2 ; 
int hitNo = 0 ; 

indx = -1 ; 

do 

25 { 

indx = IHBFindNextOne(bitset-> bitset, indx+1); 
if ( indx == -1 ) 
break; 

indxl = indx / bitset- > allocSizes[l] ; 
30 indx2 = indx % bitset- >allocSizes[l] ; 

indexes[0] [hitNo] = indxl + 1 ; 
indexes[l] [hitNo] = indx2 + 1 ; 
hitNo ++ ; 



567 



} while ( 1 ); 

} 



/* 

**+E: 
5 ** 



** Function Name : CS_PRDCT_BITSET_OPEN0 

** Purpose : Function will read in the header for a CS product bitset. 

10 ** 

** Usage : 
** 

** Returns : A handle to the product bitset info structure or NULL on 

** error. 

15 ** 

** Algorithms : None. 
** 

** Revision History : 
** 

20 ** Author Date Description 

## = = = = = = = = = = = = ==== = = = = = = 



** Fred Soltanshahi 07/26/96 Original version. 

** 

25 **-E: 
*/ 

void *CS_PRDCT_BITSET_OPEN( char *bitsetFileName , int offset ) 
{ 

struct BitSetFileStruct *bitset ; 
30 if ( ! (bitset = Read And Allocate(bitsetFileName,offset)) ) 

return (void *)NULL ; 
bitset- > totalSelected = IHBCountOnes(bitset- > bitset, 
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0, IHBBitSize(bitset->bitset)); 

/* 

** If the program did not keep track of and output this to the file then we 
** need to calculate it ourselves. 
5 */ 

if( ( bitset->numFragsInEachSite[0] ==0) |j ( bitset- > numFragsInEachSite[0] 

= = -1 )) 
{ 

CalculateFragsInSties(bitset) ; 

10 } 

return (void *)bitset ; 

} 

/* 

**+E: 

15 ** 
** 

** Function Name : CS_PRDCT_BITSET_CLOSE() 

** Purpose : Function will close a bitset file and cleanup allocated. 

20 ** memory. 
** 

** Usage : 
** 

** Returns : None. 

25 ** 

** Algorithms : None. 
** 

** Revision History : 
** 

30 ** Author Date Description 

## === = = = ==== = == = = = = = = = = ==== = = = = = = = = 
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** Fred Soltanshahi 07/26/96 Original version. 

** 

**-E: 
*/ 

5 void CS_PRDCT_BITSET_CLOSE( struct BitSetFileStruct *bitset ) 
{ 

DeallocateBitset(bitset) ; 

} 



/* 

10 **+E: 
*# 

** 



** Function Name : CS PRDCT BITSET WRITE() 



15 ** Purpose 



** Usage : 
** 

** Returns 
20 ** 

** Algorithms 



Function will write a bitset into the given file. 



1 on success or 0 on failure. 



None. 



Revision History 



25 ** Author 



Date 



Description 



** Fred Soltanshahi 



08/02/96 Original version. 



30 **-E: 
*/ 

int CS PRDCT BITSET WRITE(char *fileName,char *programName, struct 



# 
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BitSetFileStruct 'productBitset, int progBufferSize,int *progBuffer) 
{ 

if ( !WriteOutCompressedBSFile(fileName, 



10 



productBitset- > masterFilelnfo.masterFilePathName, 

productBitset- > masterFilelnfo.masterRecNo, 

programName, 
productBitset- > bitset, 
productBitset- > numVariationSites, 
productBitset- > actuallSizes, 
productBitset- > allocSizes, 
productBitset- > totalSelected, 
productBitset- > numFragsInEachSite, 
progBufferSize, 
progBuffer)) 

goto AddTraceback ; 
return 1 ; 
AddTraceback : 

fprintfCstderr.-CS.PRDCT.BITSET.WRITEO-Unable to write bitset file\n"); 
return 0 ; 

20 } 



15 



/* 

**+E: 
** 



25 ** Function Name : CS_PRDCT_BITSET_CREATE() 



** Purpose 
** master file 



Function will create an in-memory product bitset from a 



30 ** Usage 



** Returns 
** 
** 

** Algorithms : None. 

5 ** 

** Revision History : 
** 

** Author Date 



571 

: A handle to the product bitset info structure or NULL on 
error. 



Description 



10 = = = = = = = = = = 

** Fred Soltanshahi 
** 

**-E: 
*/ 



08/02/96 Original version. 



15 void * CS_PRD CT_BITS ET_CRE ATE(char *masterFileName, 

int masterRecNumber, 



int *initRawBitset) 



{ 



struct BitSetFileStruct *bitset ; 
20 if (! (bitset = Read And AllocateMaster(masterFileName, 

masterRecNumber, 



initRawBitset)) ) 



25 



else 



return (void *)NULL ; 



return (void *)bitset ; 



/* 

**+E: 
30 ** 



** Function Name : CS_PRDCT_BITSET_SETBITS() 
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10 



Purpose : Function will copy a raw bitset into the ChemSpace product 
bitset format. 



** 
** 
** 

** Usage 



: 1 on success or zero on failure. 



Date 



Description 



08/02/96 Original version. 



** Returns 
** 

** Algorithms : None. 
** 

** Revision History : 
** 

** Author 

** = = ==== = = = = = = ; 

15 ============ 

** Fred Soltanshahi 
*# 

*/ 

20 int CS_PRDCT_BITSET_SETBITS(void *bs, int *rawBS, int numProducts) 
{ 

struct BitSetFileStruct *bitset = (struct BitSetFileStruct *)bs ; 
void *compressed ; 
static int firstTime = 1 ; 
25 int i ; 
int total; 

char *cp = (char *)rawBS ; 
int rowLength ; 
int index 1 ; 
30 int index2 ; 
int byte ; 
int bit ; 

int totalSelected = 0 ; 
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if ( firstTime ) 
{ 

InitO; 

firstTime = 0 ; 

5 } 
/* 

** Just create a new one. 
*/ 

if ( bitset->bitset ) 
10 lHBDestroy(bitset->bitset ) ; 

if ( !(bitset->bitset = CreateCompressedBitSet(rawBS, 



0, 



bitset- > numVariationSites, 

15 

bitset- > actuallSizes, 

bitset- > allocS izes) ) ) 

goto UnableToCreateBitSet ; 
20 total = bitset- >actuallSizes[0] ; 

for ( i = 1 ; i < bitset- > numVariationSites ; i + + ) 

total *= bitset- > actuallSizes[i] ; 
if ( numProducts == -1 ) 

/* 

25 ** Calculate what products are being set. 
*/ 

{ 

numProducts = 0 ; 

rowLength = bitset- >actuallSizes[l] ; 
30 for ( i = 0 ; i < total ; i + + ) 

{ 



byte = ( i ) / 8 ; 



574 

bit = ( i ) % 8 ; 
if ( cp[byte] & setbits[bit] ) 
numProducts++ ; 

} 

5 } 

bitset- > totalSelected = numProducts ; 

return 1 ; 

UnableToCreateBitSet : 

fprintf(stderr,"CS_PRDCT_BITSET_SETBITS- Unable to set bitV); 

10 return 0 ; 

} 



/* 

**+E: 
** 

** 

** 
#* 

** Purpose : Function will copy a ChemSpace product bitset to a 
** raw bitset format. 



15 



20 



<* Function Name : CS_PRDCT_BITSET„TO_RAW () 



** Usage : calloc rawBS before call. useAlloc nonzero to use allocated 

** rather than actual dimensions 
** 

** Returns : 1 on success or zero on failure. 

25 ** 

** Algorithms : None. 
** 

** Revision History : 
** 

30 ** Author Date Description 



575 

** David Patterson 09/09/96 Original version. 

** 

**-E: 
*/ 

5 int CS_PRDCT_BITSET_TO_RAW (void *bs, int *rawBS, int useAlloc) 

{ CS PRDCT_BITSET_CONCAT_RAW(bs, rawBS, 0, useAlloc); 
return 1; 

} 

10 int CS_PRDCT_BITSET_CONCAT_RAW(void *bs, int *rawBS, int offset, 

int useAlloc) 

{ 

int *indxs = 0; 
int address, sum, b; 
15 struct BitSetFileStruct *bitset = (struct BitSetFileStruct *) bs; 

for ( address = -1 , b = 0 ; b < bitset- > totalSelected ; b++ ) 

address = IHBFindNextOne(bitset->bitset,address+l); 
BitSet AddressToIndexes(bitset,address , &indxs ,0) ; 

20 if (useAlloc) 

FlagProduct(rawBS, 0,0, address + offset); 

else /* must explicitly calculate the address */ 
(sum= cs _p RDC T_BITSET_INDEXES_TO_INDEX( bitset, indxs) ; 

FlagProduct(rawBS, 0,0, sum+offset); 

25 } 
} 

UTL_MEM_FREE(indxs) ; 
return 1; 

} 



30 /* 

**+E: 
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** 



Function Name : CS_PRDCT_BITSET_SELECTED 0 

: Function will return a ChemSpace bitset's totalSelected 



5 ** Purpose 



** Usage : 
** 

** Returns : integet count of selected bits in bitset 

10 ** 

** Algorithms : None. 



** Revision History : 
** 

15 ** Author 



** David Patterson 



Date 



Description 



09/24/96 Original version. 



20 **-E: 
*/ 

int CS_PRDCT_BITSET_SELECTED (void *bsvoid ) 
{ 

struct BitSetFileStruct *bs = (struct BitSetFileStruct *) bsvoid; 
25 return bs- > totalSelected; 

} 



/* 

**+E: 
** 



30 ** 

** Function Name : CS_PRDCT_BITSET_REVEAL () 



** Purpose 
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: Function will return a ChemSpace bitset's struct info to 
external calling program. 



** Usage : 

5 ** 

** Returns 
** 

** Algorithms : None 



1 on success or zero on failure. 



10 ** Revision History 
** 

** Author 



Date 



Description 



15 ** David Patterson 



09/10/96 Original version. 



** 

**-E: 
*/ 



20 



25 



30 



int CS_PRDCT_BITSET_REVEAL (void *bsvoid, 
char **MasterFile_Bitset, 
int *StartRecJBitset, 

int *BitsInAbsentia, 

int *BitsInAbsentiaNoCount, 

char **CoreFile, 

int *StartCore, 

char **FngrFile, 

char ***Xfiles, 

int **nY, 

FILE **FngrFile_File, 
int *FingerOff, 
char **ScreenFileName, 
int *BytesPerFingerPrint, 
int *WordsPerFingerprint, 
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int **query, 

int **FingerCore_FP, 

int *FingerCore_Card ) 

{ 

5 int i, size; 
int *fooi; 

struct BitSetFileStruct *bs = (struct BitSetFileStruct *) bsvoid; 
if (MasterFile_Bitset) 
*MasterFile_Bitset = bs->masterFileInfo.masterFilePathName ; 

10 if (StartRec_Bitset) 

*StartRec_Bitset =bs- > masterFilelnfo. masterRecNo ; 

if (BitsInAbsentia) 
*BitsInAbsentia = bs-> masterFilelnfo.numberOfMissingBits; 

if (BitsInAbsentiaNoCount) 
15 *BitsInAbsentiaNoCount = bs-> masterFilelnfo. lbits; 
if (CoreFile) 

*CoreFile = bs- > masterFilelnfo.corefilePathName; 

if (StartCore) 

*StartCore = bs-> masterFilelnfo. StartCore; 

20 if (FngrFile) 

*FngrFile = bs-> masterFilelnfo. fingerFileName; 

if (Xfiles) 

*Xfiles = bs-> masterFilelnfo. x_FileName; 

if (nY) 

25 *nY = bs->actuallSizes; 

if ( FngrFile_File) 

{ if (!((*FngrFile_File) = UTL_FILE_FOPEN((*FngrFile),"r"))) return 0; 

if (!UTL_FILE_FREAD(&i,sizeof(int),l,*FngrFile_File)) return 0; /* nbits in 

fp */ 

30 *BytesPerFingerPrint = ( i + 7 ) / 8 ; 

*WordsPerFingerprint = (*BytesPerFingerPrint + 3) / 4; 
(*query) = (int *) UTL_MEM_ALLOC( *BytesPerFingerPrint); 



579 

if (!UTL_FILE_FREAD(«&i,sizeof(int),l,*FngrFile_File)) return 0; /* record 



cnt */ 



if (!UTL_FILE_FREAD(&i,sizeof(int),l,*FngrFile_File)) return 0; /* record 

size */ 

5 rewind(*FngrFile_File); 

if (!(fooi = (int *) UTL_MEM_ALLOC( i ))) return 0; 

size = (3+i)/4 ; 

for ( i=0; i< = *FingerOff; i++) 

if ( ! UTL_FILE_FREAD( fooi, sizeof (int), size, *FngrFiIe_File)) 
10 return 0; 

/* if ( fooifl] ! = 2 + nY_01 * nY_02 ) return 0; */ 
if ( ScreenFileName ) 

{ 

if (!((*ScreenFileName) = UTL_STR_SAVE(fooi+4))) return 0 ; 

15 } 

if ( FingerCore_FP ) 
{ 

*FingerCore_FP = fooi; 

if (!UTL_FILE_FREAD( FingerCore_Card,sizeof(int),l, 

20 *FngrFile_File)) 

return 0; 

if ( ! UTL_FILE_FRE AD( *FingerCore_FP , 

sizeof(int), 

*WordsPerFingerprint, 

25 *FngrFile_File)) 

return 0; 

} 

} 

return 1; 

30 } 



/* 
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**+E: 



** 



** 
** 

5 ** 



Function Name : CS_PRDCT_BITSET_INDEXES_TO_INDEX0 
** Purpose : Function will return the right bit given a set of indices 

** Usage : all indexes are 0 based. 
** 

10 ** Returns : index to use in bitset. 
** 

** Algorithms : None. 
** 

** Revision History : extracted from CS_PRDCT_BITSET_SET_PRD_BIT by 

1 5 ** David Patterson 

** 

** Author Date 



Description 



20 ** Fred Soltanshahi 08/02/96 Original version. 

** 

**-E: 
*/ 



int CS PRDCT_BITSET_INDEXES_TO_INDEX( struct BitSetFileStruct *bitset, 



25 



int *indexes) 



{ 




int 


i ; 


int 


j ; 


int 


rowLength[MAX 


30 int 


indx = 0 ; 




for ( i = 0 ; i 




{ 



rowLength[i] = 1 ; 
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for ( j = i + 1 ; j < bitset->numVariationSites ; j + + ) 
rowLength[i] *= bitset- > actuallSizes[j] ; 

} 

for ( i = 0 ; i < bitset- > numVariationSites ; i++ ) 
{ 

indx + = indexesp] * rowLength[i] ; 

} 

return indx ; 



10 /* 

**+E: 

** Function Name : CS_PRDCT_BITSET_ALLOC_SIZE_INDEXES_TO_INDEX() 
15 ** 

** Purpose : Function will return the right bit given a set of indices 
it uses the allocated sizes in the bitset to get the info. 

20 int CS_PRDCT_BITSET_ALLOC_S.ZEJNDEXES.TO.INDEX( struct BitSetFileStruct 
*bitset, 

int ^indexes) 

{ 

int i ; 

25 int j ; 

int rowLength[MAX_VARIATION_SITES] ; 

int indx = 0 ; 

for ( i = 0 ; i < bitset- > numVariationSites ; i + + ) 

{ 

30 rowLength[i] = 1 ; 

for ( j = j + i ; j < bitset- > numVariationSites ; j + + ) 

rowLength[i] *= bitset- > allocSizes[j] ; 
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} 

for ( i = 0 ; i < bitset- > numVariationSites ; i + + ) 
{ 

indx + = indexes[i] * rowLength[i] ; 

5 } 

return indx ; 

} 

/* 

**+E: 

10 ** 
** 

** Function Name : CS_PRDCT_BITSET_SET_PRD_BIT() 

** Purpose : Function will set a product bit with the given indexes. 

15 ** 

** Usage : 
** 

** Returns : none. 

20 ** Algorithms : None. 
** 

** Revision History : 
** 

** Author Date Description 

25 **= = = = = = = = = = = = = = = = = = = = = = = = = = = 

** Fred Soltanshahi 08/02/96 Original version. 

30 */ 

int CS_PRDCT_BITSET_SET_PRD_BIT(void *bs, int ^indexes) 
{ 
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struct BitSetFileStruct *bitset = (struct BitSetFileStruct *)bs ; 
int indx = 0 ; 

indx = cS_PRDCT_BITSET_ALLOC_SIZE_INDEXES_TO.INDEX(b 1 tset, 

indexes); 

5 IHBSet(bitset- > bitset, indx ) ; 

bitset- >totalSelected+ + ; 

return 1 ; 

} 



/* 

10 **+E: 
** 

** 

** Function 
** 

15 ** Purpose 



Name : CS.PRDCT„BITSET^GET_RINFO0 

: Function will return the Reaction/Reagent info from 
the bitset file. 



** Usage : 
*# 

20 ** Returns : none. 
** 

** Algorithms : None. 
** 

** Revision History : 

25 ** 

** Author 



Date 



Description 



** Fred Soltanshahi 

30 ** 
**-E*. 



01/03/97 Original version. 



• r 
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int CS PRDCT_BITS ET_G ET_RINFO(void *bs, char **reactionInfo,char ***reagentInfo) 
{ 

struct BitSetFileStruct *bitset = (struct BitSetFileStruct *)bs ; 
Teactionlnfo = bitset->masterFileInfo.prefixForFiles ; 
5 *reagentInfo = bitset->masterFileInfo.reagentInfo ; 

return 1 ; 

} 



10 



15 



20 



/* 

**+E: 

** 
** 



** Function Name : CS PRDCT_BITS ET_G ET S TATS () 



** Purpose 
** 

*# 

** 

** 

** Usage : 
** Returns 



: Function will return the statistics for a bitset file, 
these will include numberOfSites, originalSizes, 
numberOfProducts and Number of fragments used at each 
variation site. 



none. 



** Algorithms : None. 



25 ** Revision History : 
** 

** Author 

** = = = = = = = = = 



Date 



Description 



30 ** Fred Soltanshahi 



08/05/96 Original version. 
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*/ 

int CS PRDCT_BITSET_GET_STATS(void *bs, int *numSites, int *numProducts, 

int **sizes, int **numUsed ) 

{ 

5 struct BitSetFileStruct *bitset = (struct BitSetFileStruct *)bs ; 
*numSites = bitset- > numVariationSites ; 
*numProducts = bitset- > totalSelected ; 

/* 

** Allocate buffers, if they have not been. 
10 */ 

if ( !(*sizes) ) 
{ 

if ( !((*sizes) = (int *)UTL_MEM_CALLOC(*numSites,sizeof(int)))) 
goto UnableToAllocateMemory ; 

15 } 

if ( !(*numUsed) ) 

{ 

if ( !((*numUsed) = (int *)UTL_MEM_CALLOC(*numSites,sizeof(int)))) 
goto UnableToAllocateMemory ; 

20 } 

memcpy(*sizes, bitset- > actualizes, sizeof(int) * *numSites ); 
memcpy(*numUsed, bitset- > numFragsInEachSite, sizeof(int) * *numSites ); 

return 1 ; 
UnableToAllocateMemory : 
25 fprintf(stderr, " CS_PRDCT_BITSET_GET_STATS() - Unable to allocate 

memory\n"); 

return 0 ; 

} 



/* 

30 **+E: 
** 
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** Function Name : CS_PRDCT_BITSET_CORE_INFO() 

** Purpose : Function will get the xfile and core and xrstring info 
5 ** from the bitset file. 

** 

** Usage : 
** 

** Returns : 1 on success or 0 on error. 
10 ** 

** Algorithms : None. 
** 

** Revision History : 
** 

15 ** Author Date Description 



** Fred Soltanshahi 08/09/96 Original version. 

*# 

20 **-E: 
*/ 

int CS_PRDCT_BITSET_CORE_INFO(void *bs, char **masterName, int *masterRecno, 

char **core, char **xrString, int *numSites, char ***xFileNames ) 

{ 

25 return ( ReadBitsetCoreInfo(bs,masterName,masterRecno ? 

core,xrString,numSites,xFileNames)); 

} 

/* 

30 ** 
** 

** Function Name : CS PRDCT BITSET PROG NAMEQ 
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** Purpose 
** 

** Usage : 
5 ** 

** Returns 
** 



Function will get the program name that produced this bitset. 



** Algorithms 



1 on success or 0 on error. 



None. 



10 ** Revision History 



** Author 

** = ==== = 



Date 



Description 



15 ** Fred Soltanshahi 08/09/96 Original version. 

*/ 

int CS PRDCT_BITSET_PROG_NAME(void *bs, char **programName) 
20 { 

*programName = ((struct BitSetFileStruct *)bs)->programInfo.programName 
return 1 ; 

} 

/* 

25 **+E: 
** 

** Function Name : CS_PRDCT_MSTR_CORE_INFO() 



30 ** Purpose : Function will get the xfile and core and xrstring info 
** from the master file. 

** 
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** Usage : 

** Returns : 1 on success or 0 on error. 
** 

5 ** Algorithms : None. 

** Revision History : 
** 

** Author Date Description 

10 **= = = = = = = = = = = = = = = = = = = 



** Fred Soltanshahi 08/09/96 Original version. 

** 

**-E: 
15 */ 

int CS_PRDCT_MSTR_COREJNFO(char *masterFile, int index, char **core, char 

**xrString, int *numSites, char ***xFileNames ) 

{ 

return ( ReadMasterCoreInfo(masterFile,index 
20 , core, xrS tring , numS ites , xFileNames)) ; 
} 



/* 
#* 

25 ** 

** Function Name : CS_PRDCT_BITSET_CREATE_BIT_STRING() 

** Purpose : Function will create a compressed version of a raw bit set. 
** It returns the memory size needed to hold the bitset. 

30 ** 

** Usage : 
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** Returns : pointer to a compressed bitset(this is not a ChemSpace 
** product bitset but just a compressed bitstring) 



5 ** Algorithms : None. 
** 

** Revision History : 
** 

** Author Date Description 

10 ** = = = = = = = = = = = = = = = = = = = 



** Fred Soltanshahi 08/06/96 Original version. 

15 */ 

void * CS_PRDCT_BITSET_C RE ATE_BIT_STRING ( int *rawBits, int offset, int 

numVariations, int *sizes, int *allocSizes, int *totalSize) 

{ 

void *compressed ; 
20 if ( !(compressed = CreateCompressedBitSet(rawBits, 



offset, 



numVariations, 



sizes, 



25 

allocSizes) ) ) 

goto UnableToCreateBitSet ; 

*totalSize = IHBRealSize(compressed); 

return compressed ; 

30 UnableToCreateBitSet : 

fprintf(stderr,"CS_PRDCT_BITSET_CREATE_BIT_STRINGO - Unable to create 

bitsetVn"); 

return ( void *)NULL ; 
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/* 

**+E: 



5 



* Function Name : CS_PRDCT_BITSET_DESTROY_BIT_STRING() 



** Purpose : Function will destroy the memory for a bitstring 
** allocate by the CREATE call above. 



10 



** Usage : 
** 

** Returns : none 
#* 

15 ** Algorithms : None. 
** 

** Revision History : 

** Author Date Description 



** Fred Soltanshahi 08/06/96 Original version. 



**-E: 
25 */ 

void CS_PRDCT_BITSET_DESTROY_BIT_STRING( void *bitset) 
{ 

IHBDestroy(bitset); 

} 



30 /* 

**+E: 



591 



Function Name : CS_PRDCT_BITSET_GETHITSQ 



** Purpose 
** 



: Function will return the indexes(into the original XI, X2 files 
for the requested number of hits. 



** Usage : 



** 



10 ** Returns : Number of hits found or -1 for error. 
## 

** Algorithms : None. 
** 

** Revision History : 



15 



** Author Date Description 



** Fred Soltanshahi 08/07/96 Original version. 



20 



*/ 

int CS_PRDCT_BITSET_GETHITS( void *bs, int offset, int numberOfHits, int 
***hitlndexes) 

25 { 

struct BitSetFileStruct *bitset = (struct BitSetFileStruct *)bs ; 

int numFound ; 
int numConnections ; 
static int *bitAddresses = (int *)NULL ; 
30 static int numBitAddresses = 0 ; 
int *indxs - (int *)NULL ; 
int start ; 
int count ; 
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int i ; 
int j ; 

(*hitlndexes) = (int **)NULL ; 
numConnections = bitset- > numVariationSites ; 

5 /* 

** Local housekeeping . 

*/ 

if ( numberOfHits > numBitAddresses ) 
{ 

10 if ( ! bit Addresses ) 

{ if ( !(bitAddresses = (int *)UTL_MEM_CALLOC(numberOfHits, 

sizeof(int))) ) 

goto UnableToAllocate ; 

15 } 

else 

{ 

if ( '.(bitAddresses = (int *)UTL_MEM_REALLOC(bitAddresses, 

20 numberOfHits* sizeof(int))) ) 

goto UnableToAllocate ; 

} 

numBitAddresses = numberOfHits ; 

} 

25 /* 

** Figure out if we have the number of hits he wanted and what their addresses 
** are in the bitset file. 

** We will have to come back and speed this up if it is to slow, but for now 
30 ** lets get it working. 
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*/ 

/* start = bitset->firstHitAddress ; */ 

start = -1 ; /* start from the begining */ 
for ( count = 0 ; count < offset ; count+ + ) 
5 { 

start = IHBFindNextOne(bitset->bitset,start+l); 

/* 

** Lets remember where the first hit is, this should save us some time later. 
*/ 

IQ if ( bitset- > firstHitAddress < = 0 ) 

bitset->firstHitAddress = start ; 

if ( start == -1 ) 

{ 

return 0 ; 

15 } 
} 

/* 

** Now lets see how many bits are set from here on. 
*/ 

20 for ( numFound = 0 ; numFound < numberOfffits ; numFound++ ) 

{ 

start = IHBFindNextOne(bitset- > bitset, start + 1); 

if ( start == -1 ) 
break; 

25 bitAddresses[numFound] = start ; 

} 

/* 

** Allocate the arrays. 
*/ 

30 if ( !(*hitlndexes = (int **)UTL_MEM_CALLOC(numConnections,sizeof(int *))) ) 

goto UnableToAllocate ; 
for ( i = 0 ; i < numConnections ; i++ ) 

{ 
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if ( !((*hitlndexes)[i] = (int *)UTL_MEM_C ALLOC(numFound , 



sizeof(int ))) ) 

goto UnableToAllocate ; 

5 } 
/* 

** Now translate each one of the bitset addresses to the variation site 

** indexes. 

*/ 

10 for ( i = 0 ; i < numFound ; i++ ) 

{ 

BitSetAddressToIndexes(bitset,bitAddresses[i],&indxs,0); 

for ( j = 0 ; j < numConnections ; j + + ) 

(*hitlndexes)[j][i] = indxs[j] + 1 ; /* Translate to 1 based indexes */ 

15 } 

if ( indxs ) 

UTL_MEM_FREE( indxs ); 

return numFound ; 
UnableToAllocate : 
20 AddTraceback : 

if ( indxs ) 

UTL_MEM_FREE( indxs ); 

return -1 ; 

} 



25 /* 

**+E: 
** 



** Function Name : CS_PRDCT_BITSET_GET_PARTIAL_HITS() 
30 ** 

** Purpose : Function will return the indexes(into the original XI, X2 files 
** for the requested number of hits. 
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** Usage : 



: Number of hits found or -1 for error. 



** Returns 

5 ** 

** Algorithms : None. 
** 

** Revision History : 
** 

Date Description 



10 ** Author 

*# ==== = : 



** Fred Soltanshahi 08/07/96 Original version. 



15 **-E: 
*/ 



in, CS PRDCT BITSET GET PARTIAL_HITS( vo.d «bs, tat •numP.oduc.s, in, site, in, 
numFix'edSUes, M .fixedSUesIndexes, in, .„umF,a g m e n,sPerSi,e, in, -hMndexes ) 



20 Luc BitSetFileStruct fitt* = (struct Bi,S e ,FileS,ruc, «)bs ; 

int total ; 

(*hitlndexes) = (int *)NULL ; 
GetPartialProductsStats( bitset , 



25 



30 



numFixedSites, 
fixedSitesIndexes, 

&total, 

numFragmentsPerSite); 



(*numProducts ) = GetPartialProductsAddresses(bitset, 

numFixedSites, 

fixedSitesIndexes, 

site, 

hitlndexes); 
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return 1 ; 



/* 

**+E: 



** Function 



Name : CS PRDCT_BITS ET_GET_PRDCT_P ARTI AL_HITS 0 



** Purpose : Function will return the indexes(into the original XI, X2 files 
10 ** for the requested number of hits. 



Usage : This works when the csln is actually being exploded. 

** Returns : Number of hits found or -1 for error. 

15 ** 

** Algorithms : None. 
*# 

** Revision History : 
## 

20 ** Author Date Description 



** Fred Soltanshahi 08/07/96 Original version. 

** 

25 **-E: 

* / 

int CS_PRDCT BITSET_GET_PRDCT_PARTIAL_HITS( void *bs, int *numProducts, int 
site, int'numFixedSites, int *fixedSitesIndexes, int *numFragmentsPerSite, int **hitlndexes 

) 

30 { 

struct BitSetFileStruct *bitset = (struct BitSetFileStruct *)bs ; 
int total ; 



10 
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(*hitlndexes) = (int *)NULL ; 
GetPartialProductsStats( bitset , 

numFixedSites, 
fixedSitesIndexes, 
&total, 

numFragmentsPerSite); 

(*numProducts ) = GetPartialProductsAddresses(bitset, 

numFixedSites, 
fixedSitesIndexes, 

site, 

hitlndexes); 

(*numProducts ) = total ; 
return 1 ; 

} 

15 AblTac.: For Chemspace btatf file cal, callback with products choices no. selected. 

Input: 

1. This function takes a BitSetFileS tract returned most likely from: 
CS_PRDCT_BITSET_OPEN(char "filename) 
20 2. A void pointer'which is passed to callback function. This is for 
whatever you want. 

3 A pointer to function returning: 

int (void *udata, int numVariants, int "choices ). 

choices is of size numVariants, the choices are zero based, and 
25 choicest is the choice for markush Y_01 , choice[l] for Y_02 etc. 

NOTE l: numVariants of -1 and a null for choices is passed to sonify 
the end of the choices excluded, just in case the function 
want to do some special processing at the end. 
NOTE 2: The return value from the callback function is ignored. 

30 Returns: 

Total number of bits excluded. 
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-1 upon error. 
** Author Date Description 



5 -RobJilek 07/26/96 Original version. 



int CS PRDCT BITSET_ZERO(struct BitSetFileStruct *bitset, void *udata, 
int (*ZeroProducts)(void *udata, int numVariants, int •choices ) ) 

{ 

10 BIT_TRACK1NG bt[l]; 

if ( bitset->numVariationSites < = 0 ) 

return -1; 

bt->numVariations = bitset- > numVariationSites; 
bt->bitset = bitset; 
15 bt- > call_udata = udata; 

bt->funcptr = ZeroProducts; 

bt->choices = (int *) UTL_MEM_CALLOC(bt->numVariations, S1 zeof(mt) ); 

bt- > totalExcluded = 0; 

/* The sequence is as follows: 
2Q IHBRange has a loop to find zeros/ones. 

It calls RangeCallback 

RangeCallback calls ZeroProducts callback. 

while ( not end of list ) 

call RangeCallback with start and end Range. 

for ( i = startRange; i < = EndRange; i++ ) // 



25 

RangeCallback 



ZeroCallback 

30 



calculate product array. 

call ZeroProducts 11 



IHBRange(bitset-> bitset, 0, (void *) bt, RangeCallback ); 
UTL_MEM_FREE((char *) bt-> choices ); 
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return bt->totalExcluded; 



} 



Synopsis: Gets called for each range of bits set. It then 
5 converts each bit to a product array and calls callback for each. 

sltic int RangeCallback ( void *udata, int startRange, int endRange ) 

{ BIT_TRACKING *bt = (BIT_TRACKING *) udata; 

10 int indx; 

int oor; 
int skip; 

void *call_udata; 
int numVar; 
15 int *choices; 

void *bitset; 

call_udata = bt- > call_udata; 
numVar = bt- > numVariations; 
choices = bt-> choices; 
20 bitset = bt->bitset; 

for ( indx = startRange; indx < = endRange; ) 

{ skip = BitSetAddressToIndexes(bitset,indx,&choices,&oor); 
if ( !oor ) 
25 { 

(*bt->funcptr)(call_udata, numVar, choices ); 
bt- > totalExcluded + + ; 
indx + + ; 

} 

30 else 
{ 

if ( skip > 0 ) 

indx += skip; 
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else 

indx+ + ; 



^ 1 /• » *\ n v /* Signify end of zeros. */ 

(*bt->funcptr)(call_udata,-l, (int *) 0 ), ' J 



return 0; 



AbUt: For Chemspace bitset file call callback with products choices selected. 
10 Input: 

1. This function takes a BitSetFileStruct returned most likely from: 

CS PRDCT_BITSET_OPEN(char "filename) 

2. A void pointer which is passed to callback function. This is for 

whatever you want. 

15 3. A pointer to function returning: 

int (void *udata, int numVariants, int "choices ). 

choices is of size numVariants, the choices are zero based, and 
choicest is the choice for markush Y_01, choice[l] for Y_02 etc. 
NOTE 1: numVariants of -1 and a null for choices is passed to signify 
20 the end of the choices excluded, just in case the function 

want to do some special processing at the end. 
NOTE 2: The return value from the callback function is ignored. 

Returns: 

Total number of bits included. 

25 -1 upon error. 

See Also: CS_PRDCT_BITSET_ZERO 

** Author Date Description 



30 -Rob Jilek 08/19/96 Original version. 



* 



/ 

int CS 



PRDCT BITSET_ONE(struct BitSetFileStruct *bitset, void *udata, 
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int (*OneProducts)(void *udata, int numVariants, int ^choices ) ) 
{ 

BIT_TRACKING bt[l]; 
if ( bitset- > numVariationSites < = 0 ) 
5 return -1; 

bt->numVariations = bitset- > numVariationSites; 

bt-> bitset = bitset; 
bt- > call_udata = udata; 
bt->funcptr = OneProducts; 
10 bt- > choices = (int *) UTL_MEM_C ALLOC (bt- > numVariations, sizeof(int) ); 

bt- > totalExcluded = 0; 

IHBRange(bitset-> bitset, 1, (void *) bt, RangeCallback ); 
UTL_MEM_FREE((char *) bt-> choices ); 
return bt-> totalExcluded; 

15 } 



#if 0 

main(argc,argv) 
int argc ; 
char *axgvU '■> 
20 { 

void *h ; 

char *masterFileName = 

•7home7/fred/work/ADS/dserv/source/dbcsln_des/TestData/Di_300j 

int masterRecNumber = 1 ; 
25 int *bitset ; 

int size = ( 300 * 400 + 7 ) / 8 ; 
int i ; 
int j ; 

int indexes[2] ; 
30 char hold[81]; 
m 1 

if ( !(h = CS_PRDCT_BITSET_OPEN(argv[l],0))) 
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fprintf(stderr, "Unable to open the bitset file %s\n",argv[l]); 
exit ; 

} 

5 cS_PRDCT_BITSET_DUMP(h); 

CS_PRDCT_BITSET_CLOSE(h); 

#else 

if ( !( h = 

CS P RDCT_BITSET_CREATE(masterFileName,masterRecNumber,NULL) ) ) 

fprintf(stderr,"Unable to create bitset for %s\n",masterFileName); 
exit ; 

CS_PRDCT_BITSET_WRITE("Test.bs" ."MyProg" ,h,0,NULL); 

15 indexes[0] = 59 ; 

indexes[l] = 129 ; 
CS_PRDCT_BITSET_SET_PRD_BIT(h .indexes) ; 

indexes[0] = 159 ; 
indexes[l] = 241 ; 
20 CS_PRDCT_BITSET_SET_PRD_BIT(h , indexes) ; 

CS_PRDCT_BITSET_WRITE("Test2.bs" , "MyProg" ,h,0,NULL); 
bitset = (int *)UTL_MEM_CALLOC(size,sizeof(int)); 

bitset[5] = 49 ; 
bitset[l] = 99 ; 
25 CS_PRDCT_BITSET_SETBITS(h,bitset,-l); 

CS_PRDCT_BITSET_WRITE('Testl.bs","MyPro g ",h,0,NULL); 

CS_PRDCT_BITSET_CLOSE(h) ; 

#endif 
} 

30 #endif 
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Ap pendix "S" 



*/ 



/* topsim 
5 



*/ 

/*+C 



* 



* This program determines which csln "products" are similar to an input 
10 * structure, where similarity occurs if the sum of differences in encoded 

* "CoMFA" fields is less than some threshold. 
* 

* The csln components are referenced in a master file with 

* one multiline record per cSLN. Record format is 

15 * Reaction class xxxx (where "Reaction class" is a literal) 

* reaction_name 

* number_of_sv__sites 

* missing_bits_count 

* hashed_only_missing_bits_count 

20 * core_filename 

* core_filename_index_of_core 

* fingerprint_filename 

* offset JntoJingerprint_file 

* first_sv_file_Xl 

25 * secod_sv Jile_X2 (etc if more than two sv_sites) 

* NOTE - ALL subsequent entries in the master file whose Reaction class 

* matches the Reaction class of the record referenced by -index are also 

* processed! ("Matching" implies matching of possible other input symbols 
30 * to components of the Reaction class line.) 

* The input structure is read as encoded fields from stdin (or 

* a named file if provided), one field per line. There 
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* must be provided (by a SYBYL SPL script), in order: 

* ,, numbcr_of.sv - sites" * "number_of_field_types" fields describing the "core" of the 
query 

* ,, number_of - sv_sites" - 1 sextets of relative coordinates of core attachment atoms 

* "number_of_sv_sites" * "number_ofJieldJypes" fields describing the "side chains" 



10 



15 



Options: 



-master name 
-bitset name 



-index number 



- name is the file with master file records 

- name is a result of an earlier search operation 
(use EITHER master or bitset) 

- which sequential record in master file to begin at 
OR offset into bitset in a bitset file 

(default = 1) 



20 



* 



-reaction name 



-details name 



- records in master file to be processed must have this 
class name 

- if provided, records in master file to be processed 

must have any one of these tokens following its class name 



25 



-distance tan 



-cooweight cwt 



- tan is the overall similarity threshold 
(default is 90.0) 

- weight of the core attachment coordinates, 
relative to fields 



30 



-nocore nocore 



- do not consider core topomer differences 
By default these are considered (required) 



-allcores allc - process all cores in the core file 



5 
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By default only one core (index in the master file) is processed 



* -maxhits max - stop when max hits are found (default infinity) 
-input filename - name of file with queries (default stdin) 
-output filename - specifies the output file for the hit info 



* _ # This flag forces the display of all 

10 * options 



***************** 



**********************-***-***---*****^********* !,! **** 



/ 

15 #include <stdio.h> 
#include < signal. h> 
#include <ctype.h> 
#include <unistd.h> 
^include <string.h> 
20 #include <sys/stat.h> 
#include <math.h> 
#include "parseopt.h" 
#include "utl_str.h" 
include "utl_mem.h" 
25 #include M utl_file.h" 
#include "utl_math.h" 
include "ct.h" 
#include "ct_expr.h" 
#include "ct_proto.h" 
30 #define GoodExit 0 
#define ErrorExit 1 

#define Visual(s) { ^ rintf s ' > 



static FILE 
static char 
static char 
static int 
5 static char 
static char 
void 
static int 
static FILE 
10 static int 
static char 
static FILE 
static char 
static char 
15 static int 
static char 
static char 
static double 
static double 
20 static char 
static int 
static char 
static double 
static char 

25 

static char 
static char 
static FILE 
static char 
30 static char 
static char 
static double 
static double 
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*OutputFile = 0; 
*OutputFileBase; 
OutputFileName[200] ; 

nOutFiles = 0; /* number of output files */ 
*MasterFile = 0; 
*BitsetFile = 0; 
*bitset; 

MasterRecord = 1; 

*MasterFile_File; 
StartCore; 
*InputSource = 0; 
*InputSourceFile; 
*ReactionNeeded = 0; 
*ScratchDetails = 0; 

nDetail = 0; 
**ReactionDetails = 0; 

*XWeights = 0; 
*RWeights = 0; 
CoreWeight; 
*FieldTypes = 0; 
nFType = 0; 
*FTypes = 0; 
*FWeights = 0; 

**FOrder = 0; /* temp, for recording L~>R order of data 

in side chain SLN */ 
**FROrder = 0; 
*Corefile = 0; 
*CoreFile_File; 
*CoreNow; 
**Xfile; 
**Xname; 
Distance = 90.0; 
CoreDistance = 0.0; 



static double 
static double 
static double 
static double 
5 static double 

static double 

static int 

static int 

static int 
10 static int 

static int 

static int 

static int 

static unsigned char 
15 static unsigned char 
static unsigned char 
static unsigned char 
static double 
static int 

20 

static int 
static int 
static int 
25 static int 
static int 

static int 

static int 

static int 
30 static int 

static int 

static int 

static int 
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DWeight = 1.0; 
Dist[16][16]; 
boundary[16]; 
CXcoords[6], CXdiffsq[6]; 
searched = 0.0; /* number searched */ 
combi = 1.0; /* number of side chain combos */ 
totnout = 0, nout = 0; /* number of products */ 
*Good_Products = 0; /* product bit set */ 
*Dead_Products = 0; /* forbidden product bit set */ 
nR; /* number of R positions (usually 2) */ 
*nX; /* number of product dimensions */ 

*Xct; /* used for indexing over all products */ 
**Xsize; /* bytes per field */ 

****X = 0; /* csln field (F x R x nX )*/ 
***Xin; /* target fields */ 
***Y; /* csln core fields (F x R ) */ 
***Yin; /* target core fields */ 
***X2Y; /* distances between X and X' */ 
nSym, /* number of symmetries in this core */ 

*CoreSyms, /* flags for all matching core symmetries 
**SymList; /* symmetry mappings */ 
DefaultSym[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; 
ReverseSym[2] = {1, 0}; 
AppendToOutputFile = 0; 
NoMorehitsPlease = 0; 
User Aborted; 
NoCore = 0; 
AllCores = 0; 
CoreOK = 0; 
CorelsSame = 0; 
SideChainOnly = 0; 
SideChainsAreSame = 0; 
NotBitOutput = 0; 
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static char comline[2048]; 
static struct ParseOptions Options[] = { 

{"master", ParseOptString, &MasterFile, 
"Prefix for all input Files" }, 
5 {"bitset", ParseOptString, &BitsetFile, 

"Name is the file with bitset records" }, 
{"distance", ParseOptDouble, &Distance, 
"Field similarity threshold (default 90.0)" }, 
{"cooweight" , ParseOptDouble, &DWeight, 
1Q "Core coord wt, relative to fields (default )" }, 

{"index", ParseOptlnt, &MasterRecord, 

"Which MasterRecord entry 1-n" }, 
{"maxhits", ParseOptlnt, &NoMorehitsPlease, 
"Maximum number of hits before stopping" }, 
15 {"nocore", ParseOptlnt, &NoCore, 

"Use -nocore to override inclusion of the core differences" }, 
{"allcores", ParseOptlnt, &AllCores, 

"Use -allcores to search all cores provided" }, 
{"input", ParseOptString, &InputSource, 
2 0 "File from which queries will be read( default stdin). "}, 

{"output" , ParseOptString, &OutputFileBase, 

"File to which hit info will be written. "}, 
{"notbits" , ParseOptlnt, &NotBitOutput, 

"Use notbits to output as index ASCII instead of std bitset." }, 
25 {"reaction", ParseOptString, &ReactionNeeded, 

"Reaction class for topomer search. "}, 
{"details" , ParseOptString, &ScratchDetails, 

"Details further discriminating the reaction class. "}, 
{"sidechain", ParseOptlnt, &SideChainOnly, 
30 "Use sidechain to search for s.rmliarity in a single sidechain only. "}, 

{"fieldtypes" , ParseOptString, &FieldTypes, 

"Names of all field types (optional prefix -weight), space separated. Does 

CTOPS if none provided. " } , 



# 
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{"xweights", ParseOptString, &XWeights, 

"Weights of varying sites. Must be nR(+core?) individual weights present (if 

any)."}, 

5 L UBS_OUTPUT_MESSAGE 0 { return 0; } /* just for compiling OK */ 
int TJTMS2_WRITE_PHOTO0 { return 0; } 

int lowercase (s) char *s; {while (*s) { if isupper(*s) *s = tolower(*s); s++;» 
static int ParseArguments( argc, argv ) 

10 * 

* This function parses the command line arguments. 

* 

* Returns: 1 on a successful command line parse, 0 otherwise. 

15 * Warnings: 



20 



Description 



* Errors: 
* 

* Author Date 

* ====== = = = = = = = = 

* G. B. Smith 02-09-93 Original Version 

* 

*/ 

int argc; 
25 char **argv; 

{ 

int nargs, 

noptions - sizeof( Options )/sizeof(Options[0]); 
nargs = UTL_PARSE_OPT( argc, argv, noptions, Options ); 
30 if( '.nargs ) goto SyntaxError; 

return 1; 
SyntaxError: 

fprintf( stderr, "Bad command line argument(s)\n" ); 
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return 0; 

} 

static int OpenOutputFileO 
/*+I 
5 * 

Returns: 1 on sucesss, else 0 



*/ 



10 char *msg; 

FILE *fp; 
OutputFile = stdout; 
if( OutputFileBase) 

{ 

15 MakeOutputFileName(); 

« We need to create output files under the ownership of the REAL user not the 

** EFFECTIVE user. This only applies if setuid options are activated. 

*/ 

20 { 

struct stat statBuff ; 

int uid ; 
int euid ; 

uid = getuidO ; 
25 euid = geteuid(); 

stat(OutputFileName, &statBuff); 

/* 

** There are two cases 

** (1) the file to output to exists 
30 - use the ownership of the current owner of the file or if you cant do that 

** do not do anything. 

(2) The file is being created. 

use the ownership of the REAL user. 



** 
** 



611 



*/ 



if ( access(OutputFileName, F_OK) = - 0 ) 
{ /* If the file exist and the real user is the owner of the file */ 
if ( statBuff.st_uid == uid ) 
5 seteuid(uid); 

} 

else 

{ /* Create the file as the REAL user */ 
seteuid(uid); 

10 } 
} 

OutputFile = fopen( OutputFileName, (AppendToOutputFile?"a H :-wb")); 

if( ! OutputFile ) { 

fprintf(stderr, "Error: Failed to open output file \"%s\"\n", 

15 OutputFileName ); 

goto ErrorReturn; 

} 

} 

return 1; 
20 ErrorReturn: 

return 0; 

} 

static int WhatsTheDifferenceO 

/* builds distance lookup table and initializes default symmetry data structure */ 
25 { 

int i, j; 

#define pow2(a) ( (a) * (a) ) 

/* the assignment of codes is based on the following (from gen_pls.c): 
static fpt cutoffll6] = {9999., 0., 2., 4., 6., 8., 10., 12., 
30 14., 16., 18., 20., 22., 24., 26., 30. }; 

*/ 

boundary[0] = 9999.; /* missing data ought never to occur. */ 
boundary[l] = -0.1 ; 
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for (i=2;i< 15;i++) 
boundary [i] = 2*i-3; 
boundary[15] = 30.0; /* this is a steep curve with a cutoff at 30! */ 
for (i=0;i<l6;i++) for (j=0;j< 16;j + +) 
5 Dist[i][j] = pow2( boundary[i] - boundary[j]); 

Distance — Distance; /* want to test D*2 directly */ 
DWeight *= DWeight; 

/* allocate once for all conceivable symmetry reorderings */ 
if (KSymList = (int **) UTL_MEM_ALLOC( sizeof( int *) * nR * (nR - 1) / 2) )) 

10 return 0; 

if (KCoreSyms = (in. •) UTL_MEM_ALLOC( sizeof( in. ) * nR * (nR - 2) )) 

return 0; 
SymList[ 0 ] = DefaultSym; 
SymList[ 1 ] = ReverseSym; 
15 return 1; 
} 

static int ReadAField( hex, index, pXP ) 

/* converts field from external (ASCii hex) format to internal */ 

char *hex; 
20 int *index; 

unsigned char **pXP; 

{ 

int words, hold; 
char next2[10], *nxhx; 
25 words = strlen( hex ) / 2; /* assuming 8-bit bytes */ 
if (! *index ) *index = words; 
if ( words ! = *index ) { 
/* bad field (most likely NULL), continue anyway */ 
*pXP = (unsigned char *) NULL; 
30 return 1; 

if (!(* P XP = (unsigned char *) UTL_MEM_ALLOC(words) )) return 0; 
for (words=0, nxhx = hex; words< *index ; words++) { 
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memcpy(next2, nxhx, 2); 
nxhx +=2; 

sscanf( next2, "%2x", &hold ); 

*(*pXP + words) = (unsigned char) hold; 

5 } 

return 1; 

} 

static int RetrievelnputO { 

/* reads the search pattern fields (generated by SYBYL script) */ 
10 int index, R, F; 
char *line; 
double atofO; 

if ('.InputSource) InputSourceFile = stdin; 
else if ('.(InputSourceFile = f 0 pen( InputSource, V ) )) { 
15 fprintf( stdout, "Could not open -input file %s\n", InputSource ); 

return 0; 

if (.(Yin = (unsigned char ***) UTL_MEM_ ALLOC ( sizeof( unsigned char **) * nFType 
))) 

20 return 0; 

for (F = 0; F < nFType; F + +) { 
if (!(Yin[ F ] = (unsigned char **) UTL_MEM_ALLOC( sizeof( unsigned char *) * nR 

))) 

return 0; 

25 memset( Yin[F], 0, sizeof( unsigned char *) * nR ); 
} 

if (INoCore) { 

/* field types are paired closest! */ 

for (index = 0; index < nR; index* +) for (F = 0; F < nFType; F + +) { 
30 /* a Field is on a single line, no parsing needed */ 

if (-1 == UTL_SCAN_GETS( InputSourceFile, "W\ Aline)) 

return 0; 

if (!ReadAField( line, Xsi Z e[ F ] + index, Yin[ F ] + index )) return 0; 
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} 

for (index = 0; index < 6; index + +) { 

if (-1 == UTL_SCAN_GETS( InputSourceFile, "\V\ "ft", &line)) 

return 0; 

5 CXcoords[ index ] = atof( line ); 

If (!(Xin = (unsigned char ***) UTL_MEM_ALLOC( sizeof( unsigned char **) * nFType 
))) 

return 0; 

10 for (F = 0; F < nFType; F++) { 

if (!(Xin[F] = (unsigned char **) UTL_MEM_ ALLOC ( sizeof( unsigned char *) * nR 

))) 

return 0; 

memset( Xin[F], 0, sizeof( unsigned char *) * nR ); 

15 } 

for (index = 0; index < nR; index++) for (F = 0; F < nFType; F+ + ) { 
/* a Field is on a single line, no parsing needed */ 

if ( .l = = UTL_SCAN_GETS( InputSourceFile, "\\", T\ &line)) 

return 0; 

20 if (!ReadAField( line, Xsize[ F ] + index, Xin[ F ] + index )) return 0; 

} 

} 

fclose( InputSourceFile ); 

return 1; 

25 } 

static int InitCoreO { 

/* readies core file and its input arrays */ 
int R, i, F; 
char *foo; 

30 

if (! (CoreFile_File = fopen(Corefile,"r"))) { 

fprintf( stderr, "%s Core file not foundAn", Corefile ); 
return 0; 
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} 

i=0; 

while ( i < StartCore ) 
{ 

5 if ( .1 == UTL_SCANJ 
if (AUCores) break; 

i++; 



GETS( CoreFile_File, "W, "#", return 0; 



} 

CoreNow = UTL_STR_SAVE( foo ); 
10 /* initialize core data structures */ ^ 
if 0 pr - (unsigned char «•> UTL_MEM_ALLOC( sizeof( unsigned char ) 

nFType)) ) 

return 0; 

for (F = 0; F < nFType; F+ +) { n RV> ^ 

- A u *^ titt MEM ALLOC( sizeof( unsigned char *) nR)) ) 
15 if (!(Y[F] = (unsigned char **) UTL_MbM_ALbu v 

return 0; 

for (R = 0; R < nR; R++) . 

a w *\ ttti MEM ALLOC( sizeof( unsigned 
if (!( *( (Y[F]) + R ) = (unsigned char *) UTL_MbM_ALL v 

char ) 

20 * (*Xsize[F ]) + R ) )) return 0; 

} 

return 1; 

} 

int CountLinesO 
25 { 

int i; 

char *foo; 

/* note that CountLines returns one less than the actual number */ 

30 'whUe ( -1 !- UTL_SCANGETS( InputSourceFile, "W, "#", Moo)) i + + ; 
rewind(InputSourceFile); 
return i; 



5 
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} 

static int initXarrays 0 
{ 

int F, i; 

if ('(Xfile = (char **) UTL_MEM_ALLOC( sizeof( char* ) * nR ))) return 0; 
if (!(Xname = (char **) UTL_MEM_ALLOC ( sizeof( char* ) * nR ))) return 0; 
if (!(nX = (int*) UTL_MEM_ALLOC( sizeof( int ) * nR ))) return 0; 
if (<(Xct = (int*) UTL_MEM_ALLOC( sizeof( int ) * nR ))) return 0; 
for (i = 0; i < nR; i+"+) { Xfile[i] = 0; Xname[i] = 0; nX[i] = 0; Xct[i] = 0; } 
10 if (!(X = (unsigned char ****) UTL_MEM_ALLOC( sizeof( unsigned char ***) * 

nFType)) ) 

return 0; 

for (F = 0; F < nFType; F + +) { 

if (!(X[F] = (unsigned char ***) UTL_MEM_ALLOC( sizeof( unsigned char **) 

15 * nR)) ) 

return 0; 

memset( X[F], 0, sizeof( unsigned char **) * nR ); 

!f (!(Xsize = (int **) UTL_MEM_ALLOC ( sizeof( int * ) * nFType ))) return 0; 
for (F = 0; F < nFType; F + +) { 

if (!(Xsize[F] = (int *) UTL_MEM_ ALLOC ( sizeof( int ) * nR ))) return 0; 
for (i = 0; i < nR; i + +) *(Xsize[F] + i) = 0; 

} 

return I; 

25 } 

static int initXfiles( i, SideChainsAreSame ) 

/* reads X file data (reactant descriptors from 2nd comment line of X file ) */ 

int i, *SideChainsAreSame; 

{ 

30 char *foo, *pch; 

if ( -1 == UTL_SCAN_GETS( MasterFile_File, "\\", &foo)) return 0; 

if (Xfile[i]) { 
/* if this X file is same as last, nothing to do */ 
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if (!strcmp( Xfile[ i ], foo ) ) return 1; 
*SideChainsAreSame = FALSE; 
UTL MEM_FREE( Xfile[i] ); 



} 



5 Xfile[ i ] = UTL_STR_SAVE(foo); 

if (! (InputSourceFile = fopen(Xfile[i],"r"))) { 

fprintf( stdout, "Could not open variation file %s\n", Xfile[i] ); 
return 0; 

} 

10 /* reading COMMENT lines to get USER_NAME value for matching */ 

if ( -1 == UTL_SCAN_GETS( InputSourceFile, "W\ "", &&*>)) return 0; 

if ( -1 = = UTL_SCAN_GETS( InputSourceFile, "\V\ "", &fbo)) return 0; 

if (Xname[i]) UTL_MEM_FREE( Xname[i] ); 

Xname[i] = 0; 
15 pch = strstr( foo, "USER_NAME=" ); 

pch += strlen( " USER_N AME = " ); 

if (!(Xname[i] = UTL_STR_SAVE( pch ) )) return 0; 

fclose( InputSourceFile ); 

return 1; 

20 } 

int StartFromBitset() 

{ 

void *CS_PRDCT_BITSET_OPEN(); 

if ( '( bitset = CS_PRDCT_BITSET_OPEN( BitsetFile, MasterRecord))) return 0; 



25 



30 



if ( !RetrieveMasterFileFromBitset(bitset, 

&MasterFile, 

&MasterRecord, /*in master file*/ 

0, 
0, 
0, 
0, 
0, 
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0, 

0, 

0, 

0, 

0, 

0, 

0, 

0, 

0, 

0, 

0, 

0 ) ) return 0; 



return 1; 



} 



15 /* 1/7/97 DEP: allow reading of bitsets. Since the masterfile must be 

read in any case, the bitset only generates "Dead.Products" */ 
int InitMasterFile() 

/* Read the master file record which is requested; 

failure if it does not match the input line info */ 

20 { 

int i, d, size, rxMatch, irx, ns, *Sym; 
char *foo; 
int *fooi; 

if (BitsetFile && ! StartFromBitset()) return 0; 
25 if (! (MasterFile_File = fopen(MasterFile,V'))) { 

fprintf( stdout, "%s (master file) not foundAn", MasterFile ); 
return 0; 

} 

rxMatch = irx = 0; 
30 while ( ! rxMatch) { 

if ( -1 — UTL_SCAN_GETS( MasterFile_File, "W, &foo)) return 0; 

if ( strstr(foo," Reaction class ")) { 
irx + + ; 
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if (bitset && irx > MasterRecord) return 0; /* the right record did not match */ 
/* preliminary match if (1) Reaction Needed matches and (2) 

NO_core must be present if NoCore is TRUE (or vice versa) */ 
rxMatch = ( irx > = MasterRecord && strstr( foo, ReactionNeeded ) 
5 && ((INoCore && !strstr( foo, "NO_core" ) ) 

| j ( NoCore && strstr( foo, "NO_core" ) ) ) ); 

} 

/* if preliminary match, check rest of .mf record -- first # reactants */ 
if (rxMatch) { 

10 /* skip name, record / compare number of reagents */ 

if ( -1 == UTL_SCAN_GETS( MasterFile_File, "\\", &foo)) return 0; 
if ( .1 = = UTL_SCAN_GETS( MasterFile_File, "\\", &foo)) return 0; 
if ( ! UTL_STR_ATOI(foo, &d) ) return 0; 

if (!nR) { 

15 if (SideChainOnly && d ! = 1) { 

fprintf( stdout, "Side Chain only but .mf file references more than 

one side chain. \n" ); 

return 0; 

} 

20 n R = d > 

if (!initXarrays()) return 0; 

} 

rxMatch = nR == d; 

} 

25 if (rxMatch) { 

/* skip fgpt stuff, record core and side chain file stuff */ 

if ( -1 == UTL_SCAN_GETS( MasterFile_File, "W", &foo)) return 0; 

if ( -1 == UTL_SCAN_GETS( MasterFile_File, "\V , &foo)) return 0; 

if ( .1 == UTL_SCAN_GETS( MasterFile_File, "\\", &foo)) return 0; 
30 if (Corefile) UTL_MEM_FREE( Corefile ); 

Corefile = UTL_STR_SAVE(foo); 

if ( -1 == UTL_SCAN_GETS( MasterFile_File, "W", &foo)) return 0; 
if ( ! UTL_STR_ATOI(foo, &StartCore ) ) r eturn °5 



620 

if ( -1 == UTL_SCAN_GETS( MasterFile_File, "W, T\ &ft»)) return 0; 
if ( -1 « UTL_SCAN_GETS( MasterFile_File, "\V\ &fbo)) return 0; 
for (i = 0; i < nR; i++) if (!initXfiles( i, &SideChainsAreSame ) ) return 0; 

} 

5 } /* read .mf file until we have a matching reaction */ 
return 1; 

} 

static int ReadXsO { 

/* reads all topmer fields from all current Xn files */ 
10 int R, F, i, n, ns, realloc, Fd; 
char *CTOPS, *line, *rptr; 
double *dp, **sdptr; 
unsigned char **uc; 
combi = 1.0; 

15 /* skip the following lengthy stuff if side chains are all the same */ 
if (SideChainsAreSame && X[0]) return 1; 
for (R = 0; R < nR; R++) { 
if (! (InputSourceFile = fopen(Xfile[R],"r"))) return 0; 

n = CounfLinesO; 
20 realloc = n ! = nX[ R ]; 
combi * = (double) n; 
if (realloc && nX[ R ] ) 

for (F = 0; F < nFType; F++) { 

for (i = 0; i < nX[R]; i++) UTL_MEM_FREE( *(X[F] + R) + i )", 
25 UTL_MEM_FREE( X[F] + R ); 

} 



nX[ R ] = n; 

if (realloc) for (F = 0; F < nFType; F++) 
if 0(*(X[F] + R) = (unsigned char **) 

UTL_MEM_ ALLOC ( sizeof( unsigned char *) * nX[R]) )) 

/* starts reading at line 2! */ 

for(i = 0; i < nX[R]; i++) { 
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if ( _ t = as UTL_SCAN_GETS( InputSourceFile, "W, &line)) 
goto error; 
/* generate info for left-to-right read */ 

for (F = 0; F < nFType; F++) FOrder[ F ] = strstr( line, FTypes[ F ] ); 

5 do { 

for (Fd = -1, F = 0, fptr = 0; F < nFType; F++) 

if (FOrder[F] && (!fptr 1 1 FOrder[F] < fptr)) {fptr = FOrder[F]; Fd - 

F;} 

if (fptr) { 

10 fptr + = strlen( FTypes[ Fd ] ) + 1; /'skipping "CTOPS = " */ 

UTL_SCAN_TOKENlZE(fptr, ' ; ' , 'W'); 
UTL_SC AN_TOKENlZE(fptr , ' > \'\V); 

if (!ReadAField( fptr, Xsize[ Fd ] + R, *(X[Fd] + R) + i )) goto error; 
FOrder[ Fd ] = 0; 

15 } 

} while (fptr); 

} 

fclose( InputSourceFile ); 
/* set up X - Y distance vectors */ 
20 if (realloc) for (F = 0; F < nFType; F + +) for (ns = 0; ns < nSym; ns + +) { 

sdptr = X2Y[ns]; 

if (sdptr[R]) UTL_MEM_FREE( sdptr[R] ); 

if (!( sdptr[ R ] = (double *) UTL_MEM_ALLOC( sizeof( double ) * nX[R] ) )) 
return 0; 

25 for (i = 0, dp = sdptr[R]; i < nX[R]; i++) *dp++ = -1.0; 

} 

} 

return 1; 

error: u 
30 fprintf( stdout, "topsim failed reading line %d of %s.\nLast line read was %s.\n , 

i, Xfile[R], line ); 
return 0; 

} 
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char **ParseQuotedString( SDetails, nDetail, Weights ) 
char *SDetails; 
int *nDetail; 
double **Weights; 

5 { 

char *pch, **ppch, *wch, **Details; 
int i; 

double *wt; 

/* first trim string to remove leading/trailing spaces and quotes */ 
10 while ('SDetails = = i ! ^Details = = ' ') SDetails* + ; 
pch = SDetails + strlen( SDetails ) - 1; 
while (*pch == 1 i *P<* == ") *!**-- = ,X0 ' ; 
/* each space is token delimiter */ 

for (i = 0, pch = SDetails; *pch; pch++) 
15 if (*pch = = ' ') i + + 

*nDetail = i+1; 

if (lOMdb - (Char «) UTL_MEM_ALLOC( si Z eof( char • ) («»^) ) )) 

return 0; 

20 " - (aou W e •) UTL_MEM__ALLOC( s,zeo f ( do ubl e , • (— ) )) 

return 0; 

wt = *Weights; 

} 

pch = SDetails; 
25 if (*pch == "") pch+ + ; 

for (i = 0, ppch = Details; i < ^Detail; i+ + , ppch + +) { 
UTL_SCAN_TOKENlZE(pch,' YW); 
*ppch = UTL_STR_SAVE( pch ); 
if (Weights) { 
30 /* note, the copy is now being modified */ 

if ((wch = strstr( *ppch, " = "))){ 

if (!isweight( wch + 1 )) return FALSE; 
*wt = atof( wch + 1 ); 
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*wch = *\0'; 

} 

else *wt = 1.0; 
wt++; 

5 } 

pch + = strlen( pch ) + 1; 

} 

retum( Details ); 

} 

10 int isweight( s ) 

/* returns true if value is a positive decimal value */ 

char *s ; 

{ 

char *c; 

15 for (c = s; *c; C++) if (KsdigW *c ) && ( *c != V )) { 

fprintf( stdout, "Bad weight value: %s. AbortingAn", s ); 
return( FALSE ); 

} 

return( TRUE ); 

20 } 

int ParseRxnO 

/* parses complex input descriptions */ 
{ 

char **ParseQuotedStringO, **scratch; 
25 int nRW, i, nX; 
double wtsum; 

/* parse field type information or set up standard (steric) type only */ 
if (FieldTypes) { 

if (.(FTypes = ParseQuotedStrin g ( FieldTypes, &nFType, &FWe lg hts ) )) return 0, 

30 /* scale to average weight of unity */ 

for (i = 0, wtsum = 0.0; i < nFType; i++) wtsum += FWeights[i]; 

wtsum /= (double) nFType; 

for (i = 0; i < nFType; i++) FWeights[ i ] /= wtsum; 
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} 

else { 

nFType = 1; 

if (!( FTypes = (char **) UTL_MEM_ALLOC ( sizeof( char * ) ) )) return 0; 
5 if (!( *FTypes = UTL_STR_SAVE( "CTOPS" ) )) return 0; 

if (!( FWeights = (double *) UTL_MEM_ALLOC( sizeof( double ) ) )) return 0; 
*FWeights = 1.0; 

} 

if (!(FOrder = (char **) UTL_MEM_ALLOC( sizeof(char *) * nFType ) )) return 0; 
10 /* parse any reaction type information present */ 
nR = 0; 

if (SideChainOnly) { 
NoCore = TRUE; 
return 1; 

15 } 

if (IReactionNeeded) return 0; 
if (ScratchDetails) { 

if (!(ReactionDetails = ParseQuotedString( ScratchDetails, &nDetail, NIL ) )) return 0; 
nR = nDetail; 
20 if (UnitXarraysO) return 0; 
} 

if (!(FROrder = (char **) UTL_MEM_ALLOC( sizeof(char *) * nFType * nR ) )) return 
0; 

/* parse any user-provided variation weighting */ 
25 CoreWeight =1.0; 

if (!( RWeights = (double *) UTL_MEM_ALLOC( sizeof( double ) * nR ) )) return 0; 

if (XWeights) { 

if (!(scratch = ParseQuotedString( XWeights, &nRW, NIL ) )) return 0; 
/* scratch will just be unfreed memory */ 
30 nX = nR + (NoCore ? 0 : 1); 

if (nRW ! = nX ) { 

fprintf( stdout, "Mismatch between count of xweights (%d) and needed 
(%d).\n", nRW, nX ); 
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return 0; 



} 



for (i = 0, wtsum 
FALSE; 



= 0.0; i < nR; i++) if (!isweight( scratch[ i ] )) return 



10 



15 



else { 

RWeights[ i ] = atof( scratch[ i ] ); 
wtsum += RWeights[ i ]; 

} 

if (INoCore) if (!isweight( scratch[ nR ])) return FALSE; 
else { 

CoreWeight = atof( scratch[ nR ] ); 
wtsum + = CoreWeight; 

} 

wtsum /= (double) nX; 

for (i = 0; i < nR; i + + ) RWeights[ i ] /= wtsum; 
if (!NoCore) CoreWeight /= wtsum; 



} 



else for (i = 0; i < nR; i++) RWeights[ i ] = 1.0; 
return 1; 

20 } 

int ReadEverythingO 

{ 

if (IMasterFile && IBitsetFile) return 0; 
if (IParseRxnO) return 0; 
25 setbits_nbits_InitO; 

if (UnitMasterFileO ) return 0; 

if ('.InitCoreO ) return 0; 

if (!WhatsTheDifference()) return 0; 

if (IRetrievelnputO ) ret urn °! 
30 return 1; 
} 

static int InitSym( nsym ) 
int nsym; 
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{ 

/* sets up symmetries to consider as described for core 
ONLY 2 reactants considered for now! 

assumes that CoreNow is pointing to the appropriate structure */ 
5 int i, F, maxsym; 
double **x2y; 

/* get symmetry from current core molecule if not supplied by caller */ 
nSym = nsym; 
if ( InSym ) { 

10 if ((!strstr( CoreNow, "SYM = " )) j i (strstr(CoreNow, "SYM=0")) ) nSym = 1; 

if (strstr(CoreNow, "SYM = 1")) nSym = 2; 
/* add more categories here */ 
} 

for (i = 0; i<nSym; i++) CoreSyms[ i ] = 1; 
15 /* allocate distance arrays to max possible for nR */ 
if (!X2Y) { 

for (maxsym = 1, i = 0; i < nR; i + +) maxsym * = (i+D; 

if (!(X2Y = (double ***) UTL_MEM_ ALLOC ( sizeof( double **) * nFType ) )) 

return 0; 

20 for (i = 0; i < maxsym; i++) { 

if (!(X2Y[i] = (double**) UTL_MEM_ALLOC ( sizeof( double *) * nR) )) return 0; 

memset( X2Y[i], 0, sizeof( double *) * nR ); 

} 

} 

25 return nSym; 
} 

int ReadCoreTopomers( CoreOK ) 

int *CoreOK; 

{ 

30 /* returns 1 unless fatal error. Sets CoreOK to TRUE if this mf entry is OK 
Also sets up symmetry considerations (which are core structure dependent), 
assumes that CoreNow is pointing to the appropriate structure */ 
int foo, i, R, F, Fd, Rd, rf, skipcore, ns, *Sym; 
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char label[15], *nxTop, *cstart, *fptr; 

r„ V TY_« »my-" "NZ=" ,, CX=","CY = ", M CZ="}; 
char *cnamesQ = {"NX= , NY- , , , 

double coo; 
double atofO; 
5 skipcore = NoCore; 

/* always consider both matches iff no core */ 
if(skipcore) InitSym( nR ); 

else skipcore = ! InitSym(0); 
/* check for any symmetry-allowed rxn by rxn match of all reactant name "details" */ 
10 for (ns = 0; ns < nSym; ns++) if (CoreSyms[ns]) { 
Sym = *(SymList + ns); 
*CoreOK = TRUE; 
if (ISideChainOnly) 
for (i = 0; i < nR && *CoreOK; i + +) 
15 if (!strstr( ReactionDetails[ Sym[ i ] ], Xname[ i ] )) 

*CoreOK = FALSE; 
if (*CoreOK) break; 

} 

if (skipcore ! ! CorelsSame 1 1 !(*CoreOK )) return 1; 
20 nxTop = CoreNow; 

/* read left-to-right, so record all starting points; 
assume that coords are bunched and appear only once 

*/ 

for (F = 0; F < nFType; F + +) for (R = 0; R < nR; R++) { 
25 sprintf( label, "%s%d", FTypes[ F ], R + 1 ); 

if (!( FROrder[ F * nR + R ] = strstr( nxTop, label ) ) ) { 
/* some requested datum missing; then this core entry has no topomer data; use it */ 
*CoreOK = 0; 
return 1; 

30 } 
} 

cstart = strstr( nxTop, cnames[ 0 ] ); 
do { 
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/* find next datum in left-to-right order */ 

for (F = 0, fptr = 0; F < nFType; F++) for (R = 0 ; R < nR; R++) { 
rf = F * nR + R; 

if (FROrder[rf] && (lfptr | i FROrder[rf] < fptr)) {fptr = FROrder[rf]; Fd = F; 
5 Rd = R;} 

} _ 

if (cstart && (lfptr | ! cstart < fptr)) {fptr = cstart; Fd - -1; } 

if (fptr) { 

/* unpack next piece of data to proper location */ 
10 if (Fd > = 0) { 

/* then datum is a field */ 

fptr += strlen( FTypes[ Fd ] ) + 2; /Skipping "CTOPn = " */ 

UTL_SCAN_TOKENIZE(fptr, ' ; ' , *\V); 
UTL_SCAN_TOKENIZE(fptr , ' > ' , 'W'); 
15 if (!ReadAField( fptr, Xsize[ Fd ] + Rd, Y[Fd] + Rd )) return 0; 

FROrder[ Fd * nR + Rd ] = 0; 

} 

else { 

for (i = 0; i < 6; i++) { 
20 /* the next data are coordinates */ 

/* read coords, save as distances squared */ 
cstart = strstr( cstart, cnames[i]); 
if (Icstart) { 
/* then this core entry has no topomer data */ 
25 *CoreOK = 0; 

return 1; 

} 

cstart + = strlen(cnames[i]); 
UTL_SCAN_TOKENIZE(cstart, ' ; ' , 'W); 
30 UTL_SCAN_TOKENIZE(cstart,' > ' ,'W); 

coo = CXcoords[ i ] - atof(cstart); 
CXdiffsq[ i ] = coo * coo * DWeight; 
cstart += strlen( cstart ) + 1; 
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} 

cstart = 0; 
} 

} 

5 } while (fptr); 
return 1; 

} 

int CoreMatches( CoreOK ) 
int *CoreOK; 
10 { 

/* returns 1 unless fatal error. Sets CoreOK to FALSE if no compound having 
this core can possibly match */ 
int F, R, i, ns, *Xct, ct; 
double sqrtO, totd, xount, cdiff; 
15 unsigned char *ptr, *qtr; 

if (NoCore | | CorelsSame) { 
"CoreOK = TRUE; 
return 1; 

} 

20 /* can check for coordinate discrepancy fast! */ 

for (i = 0, cdiff = 0.0; i < 6; i + +) cdiff + = CXdiffsqM; 
if (cdiff > Distance) { 

*CoreOK = FALSE; 

return 1; 

25 } 

for (F = 0, totd .= cdiff; F < nFType; F++) for (R = 0; R < nR; R++) { 
if (totd > Distance) break; 
ptr = (unsigned char *) *(Y[ F ] + R); 
qtr = (unsigned char *) *(Yin[F] + R); 
30 if (!ptr ! | !qtr) xount = 999999.0; 

else for(xount=0.0, i = 0; i < *(Xsize[F] + R); i+ + , P*+ + , qtr++) 
xount += Dist[ *ptr&0x0F ][ *qtr & OxOF 1 

+ Dist[ (*ptr & OxFO) > > 4][ (*qtr & 0xF0) > > 4] ; 
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totd + = xount * FWeights[ F ] / (double) nR; 

} 

CoreDistance = totd * CoreWeight; 
*CoreOK = totd < = Distance; 
5 return 1; 
} 

int FindXMatches 0 { 
int R, F, i, ns, ct, *Sym, size, what ; 
double totd, d, **sdptr, *dptr, xount; 
10 unsigned char *ptr, *qtr; 

/* reinitialize indices for permuting over all products -- 

code is general for any number of variable positions */ 
for (i = 0; i < nR; i++) Xct[i] = 0; 

AddressSize(nR, nX, &size); 
15 size = (size + 31 )/32 * 4; 

if (bitset) /* assumes actuallsizes matches current sizes!*/ 

{ 

if (!(Dead_Products = (int *) UTL_MEM_ALLOC (size))) re 
CS PRDCT_BITSET_TO_RAW( bitset, Dead_Products, 0); 
20 not_here(Dead_Products,size ); 

} 



while ( TRUE ) { 
/* exit elsewhere when all products are enumerated */ 
IndexesToAddress( nR, nX, &what, Xct); 
25 if (Dead_Products && 

TestDead(0, what) ) goto tupledone; /* not doing this 

for (ns = 0; ns < nSym; ns++) if (CoreSyms[ns]) { 
/* process all symmetries of current side chain combo */ 
Sym = *(SymList + ns); 
30 sdptr = *(X2Y + ns); 

for (R = 0, totd = CoreDistance; R < nR; R + +) { 
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if (totd > Distance) break; 
/* compute next distance if not already done - DEP knows how this works! •/ 
dptr = (*(sdptr + R )+Xct[R]); 
if ((*dptr) < 0.0) for (F = 0; F < nFType; F + + ) { 
5 p tr = (unsigned char *) *( *(X[F] + R) + Xct[ R ]); 

qtr = (unsigned char *) *(Xin[ F ] + Sym[ R ]) ; 
if (!ptr ! ! !qtr) {*dptr = 999999.0; break;} 
else { 

for(xount=0.0, i=0; i < *(Xsize[F] + R); + . ptr+ + , q*++) 
10 xount + = Dist[ *ptr & OxOF ][ *qtr & OxOF ] 

+ Dist[ (*ptr & OxFO) > > 4][ (*qtr & OxFO) > > 4] ; 
*dptr += xount * FWeights[ F ]; 
} 

} 

15 totd + = *dptr * RWeights[ R ]; 

} 

/* if hit, write it out */ 

if (totd < = Distance) { 
if (NotBitOutput || nR ! = 2) { 
20 /* ASCII index form of output - also REQUIRED if more than 2 varying elements */ 
if ('.OutputFile && !OpenOutputFile() ) return 0; 

for (R = 0; R < nR; R++) fprintf( OutputFile, "%6d XctfR] + 1 ); 
fprintf( OutputFile, «%6d%8.2f%8.2f%8.2f\n", StartCore, 

sqrt(totd), sqrt(CoreDistance), sqrt(totd - CoreDistance) ); 

25 ) 

else { 

if (!Good_Products ) { 

if 0(Good_Products = (int *) UTL_MEM_ ALLOC ( size ) )) return 

0; 

30 memset( Good Products, 0, size ); 

} 

FlagProduct(Good_Products, 0, 0, what ); 
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nout+ + ; 

if (NoMorehitsPlease && nout > = NoMorehitsPlease) goto done; 
/* output only one acceptable symmetry per product */ 
goto tupledone; 
5 } 
} 

/* generate next index tuple, AKA candidate product */ 
tupledone: 

ct = nR - 1; 
10 while ( TRUE ) { 

Xct[ ct] + + ; 

if (Xct[ ct ] < nX[ ct ]) break; 
/* finished when first index exceeds limit - the other exit */ 
if (ct = = 0) goto done; 
15 Xct[ ct ] = 0; 

ct--; 

} 

} 

done: 

20 /* output any products from this dataset */ 
if (NotBitOutput J ', nR ! = 2) { 
if (OutputFile) fclose(OutputFile); 
OutputFile = 0; 

} 

25 else if (Good_Products) { 
WriteStdFileO; 

UTL_MEM_FREE( Good_Products ); 
Good_Products = (int*) 0; 

} 

30 return 1; 
} 

int MakeOutputFileName() { 

/* a run may produce multiple files, and the user probably can't tell, 
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so append a sequence J to subsequent base names */ 

if (InOutFiles) { 

sprintf( OutputFileName, "%s", OutputFileBase ); 

/* get base name ready for next call */ 
5 strtok( OutputFileBase, "." ); 

} 

else sprintf( OutputFileName, "%s_%d.%s", OutputFileBase, 

nOutFiles, OutputFileBase + strlen(OutputFileBase) + 1 ); 

nOutFiles+ + ; 

10 } 

int WriteStdFileO { 
/* writes out the bit set of products */ 
int sizes[2]; 
int allocSizes[2] ; 
15 int numInSites[2] ; 
void ""compressed ; 
int total ; 

sizes[0] = nX[0] ; 
sizes[l] = nX[l] ; 
20 numInSites[0] = numInSites[l] = -1 ; 

allocSizes[0] = allocSizes[l] = -1 ; 
compressed = NIL; 
total = 0; 
MakeOutputFileNameO ; 
25 WriteOutCheckPointFile( OutputFileName, 

MasterFile, 
MasterRecord, 
comline, 
Good_Products, 

30 0, 
2, 

sizes, 

allocSizes, 
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nout, 

numlnSites, 
total, 

compressed); 

5 > 

int ReadNextCore( SideChainsAreSame, CorelsSame ) . 
int *SideChainsAreSame; 
int *CoreIsSame; 

,0 !• continues reading through master file for more matching Reaction Classes. 

If the side chain Hies have the same name, can skip rebuild of X diffs 
char *foo; 

int i, d, rxMatch = 0, val; 
if (AllCores) { 

15 if ( .1 == UTL_SCAN_GETS( CoreFile_File, "\\", ■#", &*»)) fclos* 

CoreFile_File ); 
else { 

/* get next core ready and quit */ 

CoreNow = UTL_STR_SAVE(foo); 
20 *SideChainsAreSame = TRUE; 

StartCore+ + ; 
return 1; 

} 

} 

25 while ( '.rxMatch ) { 

if ( .1 == UTL_SCAN_GETS( MasterFile_File, "\Y\ &*»)> return 0; 

/* preliminary match if (1) Reaction Needed matches and (2) 

NO_core must be present if NoCore is TRUE (or vice versa) */ 
rxMatch = ( strstr(fbo, "Reaction class ") && strstr(foo, ReactionNeeded) 
30 && ((INoCore && !strstr( foo, "NO_core" ) ) 

j j ( NoCore && strstr( foo, "NO_core" ) ) ) ); 
if (feof(MasterFile_File)) return 0; 
/* skip name, record number of reagents */ 



# 
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if (rxMatch) { 

if ( -1 = = UTL_SCAN_GETS( MasterFileJFile, "\\" , &foo)) return 0; 
if ( .1 == UTL_SCAN_GETS( MasterFile_File, "\V\ &foo)) return 0; 
if ( ! TJTL_STR_ATOI(foo, &val) ) return 0; 
if (val ! = nR) rxMatch = 0; 



} 



if (rxMatch) { 

/* skip fgpt stuff, record core and side chain file stuff */ 

if ( -1 == UTL_SCAN_GETS( MasterFile_File, "W", &foo)) return 0; 
if ( -1 «= UTL_SCAN_GETS( MasterFile_File, "\\", &foo)) return 0; 
if ( -1 == UTL_SCAN_GETS( MasterFile_File, "W, &fbo)) return 0; 
*CoreIsSame = TRUE; 
if (strcmp( foo, Corefile )) { 

*CoreIsSame = FALSE; 

UTL_MEM_FREE( Corefile ); 

Corefile = UTL_STR_SAVE(foo); 

if ( A == UTL_SCAN_GETS( MasterFile_File, "\\\ &foo)) return 0; 
if ( ! UTL_STR_ATOI(foo, &val ) ) return 0; 

if (val != StartCore ) *CoreIsSame = FALSE; 
StartCore = val; 
if (! *CoreIsSame ) { 

if (CoreFile_File) fclose(CoreFile_File); 

if (! (CoreFile_File = fopen(Corefile,"r"))) return 0; 

i=0; 

while ( i < StartCore ) { 

if ( -1 = = UTL_SCAN_GETS( InputSourceFile, "W" , &foo)) return 0; 

if (AllCores) break; 

i++; 

} 

CoreNow = UTL_STR_SAVE( foo ); 

} 
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if , , — UTL SCAN GET* Ma S .erFile_Rle, "W, V. Moo)) Man 0; 
if < 4 == UTLlsCANloETSt Ma S t=rFH=_File, V, «»» "turn 0; 
*SideChainsAreSame = TRUE; 

for (i = o; i < nR; i + +) if <!W*fita( *• SideChainsAreSame ) ) return 0, 

5 } 
} 

return 1; 

/* this belongs in the utl module, actually */ 
10 int MakeComLine( char -line, int ten, int argc, char **argv) 

{ 

int i, nch, totch = 0; 
sprintf(line,"%s ",argv[0]); 
for(i=l;i<argc && totch < = len;i++) 

15 { 

nch = strlen(line); 
line + = nch; 
totch + = nch; 

if (totch < len ) sprintf(line,"%s ",argv[i]); 

20 } 
} 

int CheckPointProgram(void) { 
fpnntf(stderr,»CheckPointProgram() is a lonely stub in topsim.cAn ); 

} 

25 int main( argc, argv ) 
int argc; 
char **argv; 

{ 

int processing; 
30 if( ! Parse Arguments( argc, argv ) ) 

goto SyntaxError; 
MakeComLine( comline, 2048, argc, argv ); 
if (JReadEverythingO) g° to FailureExit; 
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processing = 1; 
while (processing) { 

if (! ReadCoreTopomers( &CoreOK )) goto FailureExit; 
if (CoreOK && !CoreMatches( &CoreOK )) goto FailureExit; 
5 if (CoreOK && !ReadXs()) goto FailureExit; 

searched += combi; 

if (CoreOK && IFindXMatchesO) goto FailureExit; 
totnout + = nout; 
nout = 0; 

10 processing = ReadNextCore ( & SideChainsAreSame, & CoreIsSame ) && 

(INoMorehitsPlease i | nout < NoMorehitsPlease); 

} fprintf(stdout, "Normal Exit: %d of %f are neighbor^ " , totnout, searched ); 
User Aborted ? exit(ErrorExit) : exit(GoodExit); 

15 SyntaxError: 
exit(l); 
FailureExit: 

exit(ErrorExit); 

} 

20 /* 

numVariations is number of dimensions Y_01, Y_02 etc (normally 2) 
dsize contains the nY_01, nY_02 etc 
address is the bit number (0 to N-l) 

choices will contain the offsets (0 based) of Y_01, Y_02 etc. on return 
int AddressToIndexes(int numVariations, int 'allPtr, int address, int *chPtr ) 

1 for ( chPtr + = (numVariations - 1 ), allPtr + = (numVariations - 1) ; 

numVariations— ; 
30 allPtr-, chPtr- ) 

{ 

*chPtr = address % *aliPtr; 
address = address / *a!!Ptr; 



# 
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} 

return 1; 

} 

int IndexesToAddress(int num Variations, int ♦allPtr, int 'address, int *ind) 

5 { 

int i ; 

int indx = 0 ; 

for (i=0;i<numVariations;i + +) 
indx += indx * allPtrfi] + ind[i]; 
10 'address = indx; 

return 1 ; 

} 

int AddressSize(int num Variations, int *allPtr, mt *size) 
{ 

15 for ( *size = 1 ; -numVariations; allPtr++) *size *= *allPtr; 
return 1; 

} 



int not_here( what, nbytes ) 
unsigned char *what; 
20 int nbytes; 
{ 

for ( ; nbytes; -nbytes) *what++ = ~*what; 
return 1; 

} 
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Ap pendix "T" 



©macro FragCTOPS ChSp 



5 # Entry point for Web-based topomeric search initialization 

I sets up a set of topomeric searches, by identifying topomer data arising 
from 

10 » substructure! searcmng of SLN pauems found in topfrag.tbl to the 

, qu ery structure and generating the topomenc data and search command file 
entry 

# for all resulting fragmentations of the query structure. 

15 # The Query SLN(s) are assumed to be referenced by $CS_QUERY; 

# The file(s) to be searched are referenced by $CS_DATASET (space 
separated) 

# The directory where command files are to be written is $CS_TEMPDIR 

# The GUI parameters are to be in $CS_PARAMETERS 
20 # The name of the output file(s) is to be in $CS_OUTPUT 

# read in the data 
globalvar CTOP 
globalvar ACD'.TopInited 

localvar fcmdn fcmd tsln dist t base mf mfo nln nxid ferr ferrn rxids dott 
25 # check the input parameters 

setvar ferrn %cat( $CS_TEMPDIR "/CSerror.log" ) 
setvar ferr %open( Sferrn "w" ) 
setvar flogn %cat( $CS_TEMPDIR '7topfrag.log" ) 
setvar flog %open( $flogn "w" ) 
30 setvar fcmdn %cat( $CS_TEMPDIR "/CSCommands.cmd" ) 
setvar fcmd %open( Sfcmdn "w" ) 
if %not( $fcmd ) 

%write( Sferr could not open temp file Sfcmdn to write ChemSpace search 
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cmds. Quitting) >$nulldev 
return 
endif 

for tsln in $CS_QUERY 
5 if %pos( V Stsln) 

setvar nogood TRUE 
if %pos( "<" $tsln) 

if %gt( %pos( V $tsln ) %pos( - < " Stsln ) ) 

setvar nogood 
10 endif 
endif 

if $nogood 

%write( Sferr Topomeric searches revive a monomolecuiar search target. 

Quitting ) >$nulldev 
15 goto error 

endif 
endif 

%write( $Hog QUERY: Stsln >$m>lldev 

setvar dis. %CSj,ara mJ ,arse( distance $CS_PARAMETERS 91.0 , 

20 if %not( $dist ) 

ftwrit* $ferr No topomeric distance provided. Quitting ) >$nulldev 

goto error 
endif 

setvar priority %CS J ,aram J »rse< priority $CS_PARAMETERS 3.0 ) 

25 if %not( Spriority ) 

%write( Sferr No reaction priority provided. Quitting ) >$nulldev 

goto error 
endif 

%write< $tog Fragment Priority: Jpriority ) > Snulldev 
30 setvar CTOP[ ONLY1 1 SCSjaramjarse* only.subs SCS.PARAMETERS ) 

if $CTOP[ ONLY ] 

%write( $flog Matching Side Chain Only ) >$nuUdev 

endif 
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setvar CTOP[ WEIGHTS ] %CS_param_parse( xweights $CS_PARAMETERS ) 

if $CTOP[ WEIGHTS ] 

%write( $flog User Specified Weighting as: $CTOP[ WEIGHTS ] ) >$nulldev 

for w in $CTOP[ WEIGHTS ] 
5 setvar pats %search2d( $tsln %arg( 1 %set_unpack( $w ) ) NoDup 0 y ) 

if %not( Spats ) 

%write( Sferr Weighted search for fragment %arg( 1 %set_unpa C k( $w ) ) 

not 

in $tsln » can't happen! ) >$nulldev 
IQ goto error 

else 

if %gt( %count( $pats ) 1 ) 

%write( Sflog NOTE: Multiple hits for weighting fragment %arg( 1 

%set_unpack( $w ) ) in $tsln ) >$nulldev 
15 endif 
endif 
endfor 
endif 

setvar CTOP[ CHBD ] %CS_param_parse( hbonding $CS_PARAMETERS ) 

20 if $CTOP[ CHBD ] 

%write( $flog FIELDS include Hydrogen Bonding with weight of $CTOP[ CHBD ] 

) 

> $nulldev 
endif 

25 zap ml >$nulldev 

%sln_to_mol( ml Stsln ) >$nulldev 

if %molempty( ml ) 

%write( $ferr SYBYL cannot handle search target (SLN is: Stsln ). 

Quitting ) >$nulldev 
30 goto error 

endif 

setvar t %mol_info( ml N ATOMS ) 
FILLVALENCE Ml(*) H 1.0 1.5 1.0 1.5 >$nulldev 
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if $CTOP[ 0NLY1 ] 

if %neq( %mol_info( ml NATOMS ) %math( $t + 1 ) ) 
%write( $ferr Side chain search but target $tsln has other than one 
unfilled valence ) >$nulldev 
5 goto error 

endif 

else 

if %neq( %mol_info( ml NATOMS ) $t ) 

%write( $ferr Search Target Stein has unfilled valences. Quitting ) 

10 > Snulldev 

goto error 
endif 
endif 

if $CTOP[ ONLY1 ] 
15 # only one side chain to model is a special case 
CTOP'.SideChainOnly $fcmd $ferr $flog $dist 

else 

# check for custom topomer fragmentation table or selection 
setvar tftabn 
20 setvar tfrows 

if $CS_TOPFRAG 

setvar t %pos( "_" $CS_TOPFRAG ) 
if %not( $t ) 

%write( Sferr Custom table name $CS_TOPFRAG missing an "_" ) >$nulldev 
25 goto error 

else 

setvar tftabn %substr( SCSJTOPFRAG 1 %math( $t - 1 ) ) 
setvar tfrows %substr( SCSJTOPFRAG %math( $t + 1 ) ) 
endif 
30 endif 

if %set_and( "%set_create( %table_name() )" TOPFRAG ) 

table close TOPFRAG 
endif 
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if %not( $tftabn ) 

setvar tftabn %cat( $DSERV_TB topfrag.tbl ) 

endif 

table recall $tftabn > $nulldev 

if %not( %set_and( "%set_create( %table_nameO )" TOPFRAG ) ) 

%write( $ferr $tftabn not found. Quitting ) >$nulldev 

goto error 
endif 

%write( $flog Topomer fragmentation table is %cat( $DSERV_TB topfrag.tbl 
) ) > $nulldev 

# initialize random file name sequence generator 
setvar t %timeO 

setvar base %rand( %substr( "$t" %math( %strlen( "$t" ) - 6 ) 2 ) ) 
TAILOR SET MAXIMIN2 MAXIMUM ITERATIONS 1000 j | 
%write( $flog Master file(s): $CS_DATASET ) >$nulldev 
%write( $flog TOPFRAG table: $tftabn -- Row selection: $tfrows ) 
> $nulldev 

if %not( $tfrows ) 

setvar tfrows %set_create( %range( 1 %table_attribute( NROWS ) ) ) 

endif 

for rxid in %set_unpack( $tfrows ) 
ft processing ... 

%write($flog ) >$nulldev 

# cheek priority 

TABLE Default TOPFRAG 

if %gt( %rcell( $rxid PRIORITY ) $priority ) 

%write( $flog TOPFRAG entry $rxid priority > $priority. ) >$nulldev 

break 
endif 

setvar CTOP[RxnCount][$rxid] 0 

if %CS_ReactantMatch( $rxid $fcmd Sferr $tsln $flog ) 

%write( $flog > > > Topomer search queueing (TOPFRAG row $rxid) ) 

> $nulldev 
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CS!Queue_Search $fcmd $rxid $dist $flog 

endif 

endfor 
endif 
5 endfor 

# may need to purge or rename error file here! 

%close( $fcmd ) 
%close( $ferr ) 
%close( $flog ) 
10 return 
error: 

%close( $fcmd ) 

# ensure nothing in search command file ! 

%file_delete( $fcmdn ) >$nulldev 



